library(tidyverse)
-- Attaching packages ------------------------------------------------------------------------------------------ tidyverse 1.2.1 --
v ggplot2 3.1.0 v purrr 0.2.5
v tibble 2.0.1 v dplyr 0.7.8
v tidyr 0.8.2 v stringr 1.3.1
v readr 1.3.1 v forcats 0.3.0
-- Conflicts --------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(cowplot)
Attaching package: 'cowplot'
The following object is masked from 'package:ggplot2':
ggsave
library(GGally)
Attaching package: 'GGally'
The following object is masked from 'package:dplyr':
nasa
library(heatmaply)
Loading required package: plotly
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
Loading required package: viridis
Loading required package: viridisLite
======================
Welcome to heatmaply version 0.15.2
Type citation('heatmaply') for how to cite the package.
Type ?heatmaply for the main documentation.
The github page is: https://github.com/talgalili/heatmaply/
Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
Or contact: <tal.galili@gmail.com>
======================
library(sva)
Loading required package: mgcv
Loading required package: nlme
Attaching package: 'nlme'
The following object is masked from 'package:dplyr':
collapse
This is mgcv 1.8-26. For overview type 'help("mgcv-package")'.
Loading required package: genefilter
Attaching package: 'genefilter'
The following object is masked from 'package:readr':
spec
Loading required package: BiocParallel
library(limma)
library(broom)
library(ggridges)
Attaching package: 'ggridges'
The following object is masked from 'package:ggplot2':
scale_discrete_manual
library(KEGGREST)
library(ggthemes)
Attaching package: 'ggthemes'
The following object is masked from 'package:cowplot':
theme_map
theme_set(theme_minimal())
# Cells #
vpa.cell.neg.raw <- read_csv("./data/abundances/vpa_1and2_cells_target_negmode.csv")
Parsed with column specification:
cols(
.default = col_double(),
Samples = col_character(),
Mode = col_character(),
Type = col_character(),
Group = col_character(),
Treatment = col_character(),
Plate = col_character(),
Experiment = col_character()
)
See spec(...) for full column specifications.
vpa.cell.pos.raw <- read_csv("./data/abundances/vpa_1and2_cells_target_posmode.csv")
Parsed with column specification:
cols(
.default = col_double(),
Samples = col_character(),
Mode = col_character(),
Type = col_character(),
Group = col_character(),
Treatment = col_character(),
Plate = col_character(),
Experiment = col_character()
)
See spec(...) for full column specifications.
# Media #
vpa.med.neg.raw <- read_csv("./data/abundances/vpa_1and2_media_target_negmode.csv")
Parsed with column specification:
cols(
.default = col_double(),
Samples = col_character(),
Mode = col_character(),
Type = col_character(),
Group = col_character(),
Treatment = col_character(),
Plate = col_character(),
Experiment = col_character()
)
See spec(...) for full column specifications.
vpa.med.pos.raw <- read_csv("./data/abundances/vpa_1and2_media_target_posmode.csv")
Parsed with column specification:
cols(
.default = col_double(),
Samples = col_character(),
Mode = col_character(),
Type = col_character(),
Group = col_character(),
Treatment = col_character(),
Plate = col_character(),
Experiment = col_character()
)
See spec(...) for full column specifications.
# Cells #
vpa.cell.neg.compound.info <- read_csv("./data/compound_info/vpa_1and2_cells_target_negmode_cmpnd_info.csv")
Parsed with column specification:
cols(
compound_short = col_character(),
compound_full = col_character(),
formula = col_character(),
mass = col_double(),
rt = col_double(),
cas_id = col_character()
)
vpa.cell.pos.compound.info <- read_csv("./data/compound_info/vpa_1and2_cells_target_posmode_cmpnd_info.csv")
Parsed with column specification:
cols(
compound_short = col_character(),
compound_full = col_character(),
formula = col_character(),
mass = col_double(),
rt = col_double(),
cas_id = col_character()
)
# Media #
vpa.med.neg.compound.info <- read_csv("./data/compound_info/vpa_1and2_media_target_negmode_cmpnd_info.csv")
Parsed with column specification:
cols(
compound_short = col_character(),
compound_full = col_character(),
formula = col_character(),
mass = col_double(),
rt = col_double(),
cas_id = col_character()
)
vpa.med.pos.compound.info <- read_csv("./data/compound_info/vpa_1and2_media_target_posmode_cmpnd_info.csv")
Parsed with column specification:
cols(
compound_short = col_character(),
compound_full = col_character(),
formula = col_character(),
mass = col_double(),
rt = col_double(),
cas_id = col_character()
)
# Kegg/other ID reference #
cmpnd.id.db <- read_csv("./data/other/anp_db_compound_kegg.csv")
Parsed with column specification:
cols(
compound_full = col_character(),
cas_id = col_character(),
HMDB = col_character(),
Metlin = col_character(),
KEGG = col_character()
)
# run order and protein quant (cell samples)
# although run order is the same for media samples
vpa.cell.other.info <- read_csv("./data/other/vpa_exp1_other_info.csv")
Parsed with column specification:
cols(
Samples = col_character(),
prot_conc_ug_uL = col_double(),
run_order = col_double()
)
MissingPerSamplePlot <- function(raw.data, start.string) {
# Counts the number of missing/NA values per sample and
# percent compounds missing out of total number of compounds per sample
# Then passes the result into a vertical bar plot, where each
# bar represents a single sample and the size of the bar
# is the % of compounds missing
counted.na <- raw.data %>%
select(starts_with(start.string)) %>%
mutate(
count.na = apply(., 1, function(x) sum(is.na(x))),
percent.na = (count.na / ncol(raw.data %>% select(starts_with(start.string)))) * 100
) %>%
dplyr::select(count.na, percent.na) %>%
bind_cols(
raw.data %>%
select(Samples, Group)
) %>%
arrange(percent.na) %>%
mutate(f.order = factor(Samples, levels = Samples))
counted.na %>%
ggplot(aes(x = f.order, y = percent.na, fill = Group)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = 20, color = "gray", size = 1, alpha = 0.8) +
coord_flip()+
xlab("Samples") +
ylab("Percent missing values in sample") +
theme(axis.text.y = element_text(size = 6))
}
MissingPerCompound <- function(raw.data, start.string){
# Function to count in how many experimental samples each compound is missing
# Also calculates the % missing:
# (# NA per compound across all experimental samples) * 100 / (tot num of samples)
raw.data %>%
filter(Group == "sample") %>%
select(Samples, starts_with(start.string)) %>%
gather(key = "Compound", value = "Abundance", -Samples) %>%
group_by(Compound) %>%
summarise(
na_count = sum(is.na(Abundance)),
n_samples = n(),
percent_na = (na_count * 100 / n_samples)
) %>%
filter(na_count > 0) %>%
arrange(desc(percent_na))
}
ReplaceNAwMinLogTransformSingle <- function(raw.dataframe, start.prefix) {
# Function to replace any NAs in each column with the minimum for that column,
# separately for each sample type.
# NA in negative control samples are replaced by 2.
# Then data is log2 transformed
smpls <- raw.dataframe %>%
filter(Group == "sample") %>%
dplyr::select(starts_with(start.prefix))
smpls.min <- lapply(smpls, min, na.rm = TRUE)
# replace the missing values in the real samples with the minimum of the samples
# then take the log
smpls.noNA <- raw.dataframe %>%
filter(Group == "sample") %>%
dplyr::select(Samples:Experiment) %>%
bind_cols(
smpls %>%
replace_na(replace = smpls.min) %>%
log2()
)
# QC
QC <- raw.dataframe %>%
filter(Group == "QC") %>%
dplyr::select(starts_with(start.prefix))
QC.min <- lapply(QC, min, na.rm = TRUE)
# replace the missing values in the QC with the minimum of the QC
# then take the log
QC.noNA <- raw.dataframe %>%
filter(Group == "QC") %>%
dplyr::select(Samples:Experiment) %>%
bind_cols(
QC %>%
replace_na(replace = QC.min) %>%
log2()
)
# replace the missing values in solv and empty samples with 2 - for PCA analysis
# then take the log
other.min <- setNames(
as.list(
rep(2, ncol(
raw.dataframe %>%
dplyr::select(starts_with(start.prefix))))
),
colnames(raw.dataframe %>% dplyr::select(starts_with(start.prefix)))
)
other.num.log <- raw.dataframe %>%
filter(Group != "sample" & Group != "QC") %>%
dplyr::select(Samples:Experiment) %>%
bind_cols(
raw.dataframe %>%
filter(Group != "sample" & Group != "QC") %>%
dplyr::select(starts_with(start.prefix)) %>%
replace_na(replace = other.min) %>%
log2()
)
# combine them together back into one data frame
all.noNA <- smpls.noNA %>%
bind_rows(QC.noNA) %>%
bind_rows(other.num.log)
}
HeatmapPrepAlt <- function(raw.data, start.prefix){
# function for preparing dara for heatmap viz
x <- raw.data %>%
select(starts_with(start.prefix)) %>%
scale(center = TRUE, scale = TRUE) %>%
as.matrix()
row.names(x) <- raw.data$Samples
return(x)
}
Q: What are the distributions of compound masses and retention times?
# bind all 4 compound info df into 1
full.vpa.cmpnd <- vpa.cell.neg.compound.info %>%
mutate(Set = "Cells / Neg") %>%
bind_rows(vpa.cell.pos.compound.info %>% mutate(Set = "Cells / Pos")) %>%
bind_rows(vpa.med.neg.compound.info %>% mutate(Set = "Media / Neg")) %>%
bind_rows(vpa.med.pos.compound.info %>% mutate(Set = "Media / Pos"))
full.vpa.cmpnd %>%
ggplot(aes(x = rt, y = mass)) +
geom_point(size = 3, alpha = 0.3) +
xlab("Retention Time (min)") +
ylab("Mass (Da)") +
ggtitle("Mass v RT\nVPA Only Exp") +
ylim(0, 1100)
full.vpa.cmpnd %>%
ggplot(aes(x = rt, y = mass, color = Set)) +
geom_point(size = 3, alpha = 0.5) +
xlab("Retention Time (min)") +
ylab("Mass (Da)") +
ggtitle("Mass v RT\nVPA Only Exp") +
facet_wrap(~ Set) +
ylim(0, 1100)
The compounds included in the Cells / Neg mode and Cells / Pos mode datasets loook to have a good spread of masses and retention times within the expected RT window of 0 to 29min and mass window of 50 to 1000Da. A smaller number of compounds were detected in the media samples, and the larger molecules that were detected in cells were not detected in media. This is not a surprising result, media in general is expected to have a lower number of compounds than the cells themselves, as cells produce a wide variety of molecules that are not exported to the media.
Q: Which compounds were found in one or more of the data types?
### vpa cell join ###
vpa.cell.cmpnd.join <- vpa.cell.neg.compound.info %>%
inner_join(vpa.cell.pos.compound.info, by = "cas_id", suffix = c(".c.n", ".c.p")) %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
# compound names - found in pos and neg mode / cells
print(vpa.cell.cmpnd.join$compound_full.c.n)
[1] "Glycine"
[2] "Acetoin"
[3] "Alanine"
[4] "Sarcosine"
[5] "Beta-Alanine"
[6] "2-Aminobutyric Acid"
[7] "BAIBA"
[8] "Serine"
[9] "Hypotaurine"
[10] "Uracil"
[11] "Creatinine"
[12] "5,6-Dihydrouracil"
[13] "Proline"
[14] "Valine"
[15] "Threonine"
[16] "Taurine"
[17] "4-Oxoproline"
[18] "Ketoleucine"
[19] "trans-4-Hydroxyproline"
[20] "Creatine"
[21] "Isoleucine"
[22] "Leucine"
[23] "Asparagine"
[24] "5-Hydroxyhexanoic Acid"
[25] "Aspartic Acid"
[26] "Adenine"
[27] "Hypoxanthine"
[28] "O-Phosphoethanolamine"
[29] "Glutamine"
[30] "Lysine"
[31] "N-Acetylserine"
[32] "Glutamic Acid"
[33] "Methionine"
[34] "D-Ribose"
[35] "Guanine"
[36] "Xanthine"
[37] "3-Sulfinoalanine"
[38] "Histidine"
[39] "Orotic Acid"
[40] "Allantoin"
[41] "Fucose"
[42] "Phenylalanine"
[43] "Pyridoxal"
[44] "Cysteic Acid"
[45] "Pyridoxine"
[46] "3-Methylhistidine"
[47] "G3P"
[48] "Glycerol 1-Phosphate / Glycerol 3-Phosphate"
[49] "N-Carbamoyl-L-aspartic Acid"
[50] "Tyrosine"
[51] "D-Galactitol"
[52] "Phosphocholine"
[53] "N-alpha-Acetylglutamine"
[54] "Tryptophan"
[55] "Phosphocreatine"
[56] "Glycerylphosphorylethanolamine"
[57] "O-Succinylhomoserine"
[58] "Pantothenic Acid"
[59] "GlcNAc"
[60] "Cystathionine"
[61] "Deoxycytidine"
[62] "Cytidine"
[63] "Uridine"
[64] "Palmitoleic Acid"
[65] "Glycerol 1-phosphoserine"
[66] "D-Glucose 6-phosphate"
[67] "Thiamine"
[68] "Inosine"
[69] "Myoinositol-methyl-phosphate"
[70] "Linoleic Acid"
[71] "Guanosine"
[72] "Xanthosine"
[73] "L-Arginosuccinic Acid"
[74] "Ricinoleic Acid"
[75] "Glutathione"
[76] "11Z,14Z-Eicosadienoic Acid (20:2 n-6)"
[77] "CMP"
[78] "UMP"
[79] "3-Phosphoglyceroinositol"
[80] "AMP"
[81] "GMP"
[82] "SAH"
[83] "CDP"
[84] "ADP"
[85] "GDP"
[86] "LysoPE(18:2)"
[87] "LysoPE(18:0)"
[88] "dTTP"
[89] "CTP"
[90] "UTP"
[91] "CDP-Choline"
[92] "dATP"
[93] "ATP"
[94] "GTP"
[95] "ADP-Ribose"
[96] "UDP-Galactose"
[97] "UDP-Glucose"
[98] "UDP-Glucuronic acid"
[99] "ADP-Glucose"
[100] "GDP-Glucose"
[101] "UDP-GalNAc"
[102] "UDP-GlcNAc"
[103] "GSSG"
[104] "NAD"
[105] "NADH"
[106] "NADP"
[107] "PC(36:4)"
[108] "FAD"
[109] "Acetyl-CoA"
# percent of cell / neg compounds found in cell / pos
round(nrow(vpa.cell.cmpnd.join) * 100 / nrow(vpa.cell.neg.compound.info), 1)
[1] 51.7
# percent of cell / neg compounds found in cell / pos
round(nrow(vpa.cell.cmpnd.join) * 100 / nrow(vpa.cell.pos.compound.info), 1)
[1] 61.9
vpa.cell.cmpnd.join %>%
select(contains("mass")) %>%
ggpairs()
# any rt inconsistencies?
vpa.cell.cmpnd.join %>%
select(starts_with("rt")) %>%
ggpairs()
### vpa media join ###
vpa.med.cmpnd.join <- vpa.med.neg.compound.info %>%
inner_join(vpa.med.pos.compound.info, by = "cas_id", suffix = c(".m.n", ".m.p")) %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
# compound names - found in pos and neg mode / media
print(vpa.med.cmpnd.join$compound_full.m.n)
[1] "Alanine" "Serine"
[3] "Uracil" "Valine"
[5] "Threonine" "Taurine"
[7] "Thymine" "4-Oxoproline"
[9] "Isoleucine" "Leucine"
[11] "Glutamine" "Methionine"
[13] "Xanthine" "Histidine"
[15] "Allantoin" "Phenylalanine"
[17] "Uric Acid" "D-Glucose"
[19] "Tyrosine" "D-Galactitol"
[21] "Cysteine-S-sulfonic Acid" "Tryptophan"
[23] "Pantothenic Acid" "Cytidine"
[25] "Uridine" "Inosine"
[27] "Guanosine" "Folic Acid"
# percent of media / neg compounds found in media / pos
round(nrow(vpa.med.cmpnd.join) * 100 / nrow(vpa.med.neg.compound.info), 1)
[1] 51.9
# percent of media / pos compounds found in media / neg
round(nrow(vpa.med.cmpnd.join) * 100 / nrow(vpa.med.pos.compound.info), 1)
[1] 50
### vpa all modes join ###
vpa.all.cmpnd.join <- vpa.cell.cmpnd.join %>%
inner_join(vpa.med.cmpnd.join, by = "cas_id") %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
nrow(vpa.all.cmpnd.join)
[1] 23
# check for any mass issues between all 4 modes
vpa.all.cmpnd.join %>%
select(contains("mass")) %>%
ggpairs()
# any rt inconsistencies?
vpa.all.cmpnd.join %>%
select(starts_with("rt")) %>%
ggpairs()
Q: Do any of the samples have greater than 20% missing (NA) compound abundances, out of all of the features in the dataset?
MissingPerSamplePlot(vpa.cell.neg.raw, "VPAnC") +
ggtitle("Missing Per Sample\nVPA-only / Cells / Neg Mode")
MissingPerSamplePlot(vpa.cell.pos.raw, "VPApC") +
ggtitle("Missing Per Sample\nVPA-only / Cells / Pos Mode")
MissingPerSamplePlot(vpa.med.neg.raw, "VPAnM") +
ggtitle("Missing Per Sample\nVPA-only / Media / Neg Mode")
MissingPerSamplePlot(vpa.med.pos.raw, "VPApM") +
ggtitle("Missing Per Sample\nVPA-only / Media / Pos Mode")
A: No, sample files across both datasets have very few missing values. The green-colored bars, marked “sample” are the actual experimental samples. Whereas the “solv”, or “solvent”, and “empty” samples are negative control samples that are expected to have many missing values and/or low compound abundances. They will be used to narrow down the list of features later on in the analysis.
Q: Are any of the compounds more than 20% missing in the experimental sample group? If there are any, they will be excluded from analysis.
Note: The MissingPerCompound function considers only the Group == "sample" samples.
MissingPerCompound(vpa.cell.neg.raw, "VPAnC") %>%
filter(percent_na > 20)
# A tibble: 0 x 4
# ... with 4 variables: Compound <chr>, na_count <int>, n_samples <int>,
# percent_na <dbl>
(vpa.cell.pos.cmpnd.to.excl<- MissingPerCompound(vpa.cell.pos.raw, "VPApC") %>%
filter(percent_na > 20))
# A tibble: 1 x 4
Compound na_count n_samples percent_na
<chr> <int> <int> <dbl>
1 VPApC110 9 23 39.1
MissingPerCompound(vpa.med.neg.raw, "VPAnM") %>%
filter(percent_na > 20)
# A tibble: 0 x 4
# ... with 4 variables: Compound <chr>, na_count <int>, n_samples <int>,
# percent_na <dbl>
(vpa.med.pos.cmpnd.to.excl <- MissingPerCompound(vpa.med.pos.raw, "VPApM") %>%
filter(percent_na > 20))
# A tibble: 1 x 4
Compound na_count n_samples percent_na
<chr> <int> <int> <dbl>
1 VPApM34 6 24 25
A: No compounds need to be excluded from the Cell / Neg Mode and Media / Neg Mode datasets, but 1 compound each will be excluded from the Cell / Pos Mode and Media / Pos Mode datasets.
# get the mean abundance of each compound, grouped by solvent vs empty sample vs experimental sample
vpa.cell.neg.raw.grp.mean <- vpa.cell.neg.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPAnC")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
# plot the log2 density distribution of the means
vpa.cell.neg.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA-only / Cells / Negative Mode\nGrouped by sample type")
Warning: Removed 46 rows containing non-finite values (stat_density).
# for plotting purposes, to make the plot neater
vpa.cell.neg.raw.grp.mean.order <- vpa.cell.neg.raw.grp.mean %>%
filter(Group == "sample") %>%
arrange(Grp_mean_abun)
vpa.cell.neg.raw %>%
select(Samples, Group, starts_with("VPAnC")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
# overlay the group averages
geom_line(
data = vpa.cell.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.neg.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA-only / Cells / Negative Mode")
Warning: Removed 26 rows containing missing values (geom_path).
Warning: Removed 2 rows containing missing values (geom_path).
# compound mean by group only, ordered by increasing abundance in the experimental samples
vpa.cell.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA-only / Cells / Negative Mode")
Warning: Removed 46 rows containing missing values (geom_point).
Warning: Removed 2 rows containing missing values (geom_path).
vpa.cell.neg.raw.grp.diff <- vpa.cell.neg.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vpa.cell.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 50)
Warning: Removed 16 rows containing non-finite values (stat_bin).
vpa.cell.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 50)
Warning: Removed 30 rows containing non-finite values (stat_bin).
vpa.cell.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-3, 13.5) +
ylim(-3, 13.5) +
# abline in pink
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
# lm line in green
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Cells / Neg Mode")
Warning: Removed 31 rows containing non-finite values (stat_smooth).
Warning: Removed 31 rows containing missing values (geom_point).
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv or empty)
vpa.cell.neg.cmpnd.to.incl <- vpa.cell.neg.raw.grp.diff %>%
filter((smpl_solv_diff > 2.5 & smpl_empty_diff > 2.5) | is.na(smpl_solv_diff) | is.na(smpl_empty_diff))
# how many compound were there in the raw negative mode dataset?
nrow(vpa.cell.neg.raw.grp.diff)
[1] 211
# how many compounds are retained for further analysis?
nrow(vpa.cell.neg.cmpnd.to.incl)
[1] 179
vpa.cell.pos.raw.grp.mean <- vpa.cell.pos.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPApC")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vpa.cell.pos.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA-only / Cells / Positive Mode\nGrouped by sample type")
Warning: Removed 39 rows containing non-finite values (stat_density).
vpa.cell.pos.raw.grp.mean.order <- vpa.cell.pos.raw.grp.mean %>%
filter(Group == "sample") %>%
arrange(Grp_mean_abun)
vpa.cell.pos.raw %>%
select(Samples, Group, starts_with("VPApC")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
# overlay the group averages
geom_line(
data = vpa.cell.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.pos.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA-only / Cells / Positive Mode")
vpa.cell.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA-only / Cells / Positive Mode")
Warning: Removed 39 rows containing missing values (geom_point).
vpa.cell.pos.raw.grp.diff <- vpa.cell.pos.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vpa.cell.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 50)
Warning: Removed 14 rows containing non-finite values (stat_bin).
vpa.cell.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 50)
Warning: Removed 25 rows containing non-finite values (stat_bin).
vpa.cell.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-1, 15) +
ylim(-1, 15) +
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Cells / Pos Mode")
Warning: Removed 26 rows containing non-finite values (stat_smooth).
Warning: Removed 26 rows containing missing values (geom_point).
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv or empty)
vpa.cell.pos.cmpnd.to.incl <- vpa.cell.pos.raw.grp.diff %>%
filter((smpl_solv_diff > 2.5 & smpl_empty_diff > 2.5) | is.na(smpl_solv_diff) | is.na(smpl_empty_diff)) %>%
filter(!(Compound %in% vpa.cell.pos.cmpnd.to.excl$Compound))
# how many compound were there in the raw dataset?
nrow(vpa.cell.pos.raw.grp.diff)
[1] 176
# how many compounds are retained for further analysis?
nrow(vpa.cell.pos.cmpnd.to.incl)
[1] 142
vpa.med.neg.raw.grp.mean <- vpa.med.neg.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPAnM")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vpa.med.neg.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA-only / Media / Negative Mode\nGrouped by sample type")
vpa.med.neg.raw.grp.mean.order <- vpa.med.neg.raw.grp.mean %>%
filter(Group == "empty") %>%
arrange(Grp_mean_abun)
vpa.med.neg.raw %>%
select(Samples, Group, starts_with("VPAnM")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1.5) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
# overlay the group averages
geom_line(
data = vpa.med.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.neg.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA-only / Media / Negative Mode")
Warning: Removed 2 rows containing missing values (geom_path).
vpa.med.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)")
vpa.med.neg.raw.grp.diff <- vpa.med.neg.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vpa.med.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 25)
vpa.med.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 25)
vpa.med.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-4, 4) +
ylim(-1, 4) +
# abline in pink
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
# lm line in green
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Media / Neg Mode")
Warning: Removed 12 rows containing non-finite values (stat_smooth).
Warning: Removed 12 rows containing missing values (geom_point).
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv)
vpa.med.neg.cmpnd.to.incl <- vpa.med.neg.raw.grp.diff %>%
filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff))
# how many compound were there in the raw dataset?
nrow(vpa.med.neg.raw.grp.diff)
[1] 54
# how many compounds are retained for further analysis?
nrow(vpa.med.neg.cmpnd.to.incl)
[1] 41
vpa.med.pos.raw.grp.mean <- vpa.med.pos.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPApM")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vpa.med.pos.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA-only / Media / Positive Mode\nGrouped by sample type")
Warning: Removed 3 rows containing non-finite values (stat_density).
vpa.med.pos.raw.grp.mean.order <- vpa.med.pos.raw.grp.mean %>%
filter(Group == "empty") %>%
arrange(Grp_mean_abun)
vpa.med.pos.raw %>%
select(Samples, Group, starts_with("VPApM")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1.5) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
# overlay the group averages
geom_line(
data = vpa.med.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.pos.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA-only / Media / Positive Mode")
Warning: Removed 3 rows containing missing values (geom_path).
Warning: Removed 1 rows containing missing values (geom_path).
vpa.med.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)")
Warning: Removed 3 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_path).
vpa.med.pos.raw.grp.diff <- vpa.med.pos.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vpa.med.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 25)
vpa.med.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 25)
Warning: Removed 3 rows containing non-finite values (stat_bin).
vpa.med.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-6, 2) +
ylim(-1, 7) +
# abline in pink
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
# lm line in green
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Media / pos Mode")
Warning: Removed 3 rows containing non-finite values (stat_smooth).
Warning: Removed 3 rows containing missing values (geom_point).
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv)
vpa.med.pos.cmpnd.to.incl <- vpa.med.pos.raw.grp.diff %>%
filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff)) %>%
filter(!(Compound %in% vpa.med.pos.cmpnd.to.excl$Compound))
# how many compound were there in the raw dataset?
nrow(vpa.med.pos.raw.grp.diff)
[1] 56
# how many compounds are retained for further analysis?
nrow(vpa.med.pos.cmpnd.to.incl)
[1] 36
# replace missing with minimum for each Group in each dataset and log2() transform the data:
# exclude compound that have a >20% NA count across samples
vpa.cell.neg.noNA <- vpa.cell.neg.raw %>%
select(Samples:Experiment, one_of(vpa.cell.neg.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformSingle("VPAnC")
vpa.cell.pos.noNA <- vpa.cell.pos.raw %>%
select(Samples:Experiment, one_of(vpa.cell.pos.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformSingle("VPApC")
vpa.med.neg.noNA <- vpa.med.neg.raw %>%
select(Samples:Experiment, one_of(vpa.med.neg.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformSingle("VPAnM")
vpa.med.pos.noNA <- vpa.med.pos.raw %>%
select(Samples:Experiment, one_of(vpa.med.pos.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformSingle("VPApM")
vpa.cell.neg.noNA.gathered <- vpa.cell.neg.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vpa.cell.neg.noNA) == "VPAnC1"):ncol(vpa.cell.neg.noNA)
)
# plot all abundances in a sample, grouped by sample as a boxplot
vpa.cell.neg.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA-only / Cells / Negative Mode")
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
# same data format, but as ridge plots
vpa.cell.neg.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA-only / Cells / Negative Mode")
Picking joint bandwidth of 1.03
# experimental samples only
vpa.cell.neg.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA-only / Cells / Negative Mode")
Picking joint bandwidth of 0.897
# overlay the distributions for another look
vpa.cell.neg.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA-only / Cells / Negative Mode")
# relationship with protein quant and run order?
vpa.cell.neg.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(run_order, Abundance)) +
geom_jitter(width = 0.1, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(method = "lm", color = "black", alpha = 0.8, se = FALSE)
vpa.cell.neg.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(prot_conc_ug_uL, Abundance)) +
geom_jitter(width = 0.005, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(color = "black", alpha = 0.8, se = FALSE)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
vpa.full.mass.v.rt.plot <- full.vpa.cmpnd %>%
ggplot(aes(x = rt, y = mass, color = Set)) +
geom_point(size = 2, alpha = 0.5) +
xlab("Retention Time (min)") +
ylab("Mass (Da)") +
scale_color_stata() +
facet_wrap(~ Set) +
ylim(0, 1100) +
theme_minimal() +
theme(legend.position = "none")
vpa.cell.neg.smpl.miss <- MissingPerSamplePlot(vpa.cell.neg.raw, "VPAnC") +
theme_minimal() +
scale_fill_brewer(palette = "Dark2", labels = c("Empty", "QC", "Exp Smpl", "Solv")) +
theme(
axis.text.y = element_text(size = 6),
legend.title = element_blank()
) +
ylab("Percent Missing")
vpa.cell.neg.prof.plot <- vpa.cell.neg.raw %>%
select(Samples, Group, starts_with("VPAnC")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1.5) +
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.title = element_blank()
) +
xlab("Compound") +
ylab("log2(Sample Type Mean Abundance)") +
# overlay the group averages
geom_point(
data = vpa.cell.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.neg.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 1.5
) +
scale_color_brewer(palette = "Dark2", labels = c("Empty", "QC", "Exp Smpl", "Solv"))
vpa.cell.neg.solv.v.empty.plot <- vpa.cell.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_abline(intercept = 0, slope = 1, size = 1, alpha = 0.6) +
geom_point(size = 2, alpha = 0.5) +
xlim(-3, 13.5) +
ylim(-3, 13.5) +
xlab("log2(Empty Ratio)") +
ylab("log2(Solv Ratio)") +
theme_minimal()
vpa.cell.neg.treat.dist.plot <- vpa.cell.neg.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75, trim = TRUE) +
xlab("log2(Abundance)") +
theme_minimal() +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
ylab("Density")
vpa.full.mass.v.rt.plot
vpa.cell.neg.smpl.miss
vpa.cell.neg.prof.plot
Warning: Removed 26 rows containing missing values (geom_path).
Warning: Removed 46 rows containing missing values (geom_point).
vpa.cell.neg.solv.v.empty.plot
Warning: Removed 31 rows containing missing values (geom_point).
vpa.cell.neg.treat.dist.plot
overview.fig.chap3 <- plot_grid(
plot_grid(
vpa.full.mass.v.rt.plot,
vpa.cell.neg.smpl.miss,
ncol = 2,
labels = c("A", "B")
),
vpa.cell.neg.prof.plot,
plot_grid(
vpa.cell.neg.solv.v.empty.plot,
vpa.cell.neg.treat.dist.plot,
ncol = 2,
labels = c("D", "E")
),
ncol = 1,
labels = c("", "C", ""),
rel_heights = c(1.5, 1.5, 1)
)
Warning: Removed 31 rows containing missing values (geom_point).
Warning: Removed 26 rows containing missing values (geom_path).
Warning: Removed 46 rows containing missing values (geom_point).
overview.fig.chap3
#save_plot("./results/vpa_only_exp1_overview_fig.png", overview.fig.chap3 , base_width = 8, base_height = 10)
vpa.cell.pos.noNA.gathered <- vpa.cell.pos.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vpa.cell.pos.noNA) == "VPApC1"):ncol(vpa.cell.pos.noNA)
)
# plot all abundances in a sample, grouped by sample as a boxplot
vpa.cell.pos.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA-only / Cells / Positive Mode")
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
# same data format, but as ridge plots
vpa.cell.pos.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA-only / Cells / Positive Mode")
Picking joint bandwidth of 1.18
# experimental samples only
vpa.cell.pos.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA-only / Cells / Positive Mode")
Picking joint bandwidth of 1.09
# overlay the distributions for another look
vpa.cell.pos.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA-only / Cells / Positive Mode")
# relationship with protein quant and run order?
vpa.cell.pos.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(run_order, Abundance)) +
geom_jitter(width = 0.1, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(method = "lm", color = "black", alpha = 0.8, se = FALSE)
vpa.cell.pos.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(prot_conc_ug_uL, Abundance)) +
geom_jitter(width = 0.005, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(color = "black", alpha = 0.8, se = FALSE)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
vpa.med.neg.noNA.gathered <- vpa.med.neg.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vpa.med.neg.noNA) == "VPAnM1"):ncol(vpa.med.neg.noNA)
)
# plot all abundances in a sample, grouped by sample as a boxplot
vpa.med.neg.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA-only / Media / Negative Mode")
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
# same data format, but as ridge plots
vpa.med.neg.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA-only / Media / Negative Mode")
Picking joint bandwidth of 0.936
# experimental samples only
vpa.med.neg.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA-only / Media / Negative Mode")
Picking joint bandwidth of 0.918
# overlay the distributions for another look
vpa.med.neg.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA-only / Media / Negative Mode")
# relationship with protein quant and run order?
vpa.med.neg.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(run_order, Abundance)) +
geom_jitter(width = 0.1, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(method = "lm", color = "black", alpha = 0.8, se = FALSE)
vpa.med.neg.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(prot_conc_ug_uL, Abundance)) +
geom_jitter(width = 0.005, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(color = "black", alpha = 0.8, se = FALSE)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
vpa.med.pos.noNA.gathered <- vpa.med.pos.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vpa.med.pos.noNA) == "VPApM1"):ncol(vpa.med.pos.noNA)
)
# plot all abundances in a sample, grouped by sample as a boxplot
vpa.med.pos.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA-only / Media / Positive Mode")
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
Warning: Removed 1 rows containing missing values (geom_segment).
# same data format, but as ridge plots
vpa.med.pos.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA-only / Media / Positive Mode")
Picking joint bandwidth of 1.14
# experimental samples only
vpa.med.pos.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA-only / Media / Positive Mode")
Picking joint bandwidth of 1.13
# overlay the distributions for another look
vpa.med.pos.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA-only / Media / Positive Mode")
vpa.med.pos.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(run_order, Abundance)) +
geom_jitter(width = 0.1, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(method = "lm", color = "black", alpha = 0.8, se = FALSE)
vpa.med.pos.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(prot_conc_ug_uL, Abundance)) +
geom_jitter(width = 0.005, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(color = "black", alpha = 0.8, se = FALSE)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
Some plots to understand the relationship between the samples, QC samples, solvent, and empty samples in some cases.
### PCA on all Samples ###
vpa.cell.neg.full.pca <- vpa.cell.neg.noNA %>%
select(starts_with("VPAnC")) %>%
# good idea to center data before pca, but scaling should not be necessary
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
# plot variance explained by each new principal component
plot(
(vpa.cell.neg.full.pca$sdev ^ 2) * 100 / sum(vpa.cell.neg.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA-only / Cells / Negative Mode",
type = "b"
)
vpa.cell.neg.full.pca.x <- as.data.frame(vpa.cell.neg.full.pca$x)
row.names(vpa.cell.neg.full.pca.x) <- vpa.cell.neg.noNA$Samples
vpa.cell.neg.full.pca.x <- vpa.cell.neg.full.pca.x %>%
bind_cols(vpa.cell.neg.noNA %>% select(Group:Experiment))
vpa.cell.neg.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (90.5% Var)") +
ylab("PC2 (3.1%)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA-only / Cells / Negative Mode")
### Samples and QC ###
vpa.cell.neg.smpl.QC.pca <- vpa.cell.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.cell.neg.smpl.QC.pca$sdev ^ 2) * 100 / sum(vpa.cell.neg.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA-only / Cells / Negative Mode",
type = "b"
)
vpa.cell.neg.smpl.QC.pca.x <- as.data.frame(vpa.cell.neg.smpl.QC.pca$x)
vpa.cell.neg.smpl.QC.pca.x <- vpa.cell.neg.smpl.QC.pca.x %>%
bind_cols(
vpa.cell.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.cell.neg.smpl.QC.pca.x) <- vpa.cell.neg.smpl.QC.pca.x$Samples
vpa.cell.neg.smpl.QC.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (35.1% Var)") +
ylab("PC2 (19.7%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Cells / Negative Mode")
vpa.cell.neg.smpl.QC.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (16.1% Var)") +
ylab("PC4 (6.4%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Cells / Negative Mode")
### Experimental Samples Only ###
vpa.cell.neg.smpl.pca <- vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.cell.neg.smpl.pca$sdev ^ 2) * 100 / sum(vpa.cell.neg.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA-only / Cells / Negative Mode",
type = "b"
)
vpa.cell.neg.smpl.pca.x <- as.data.frame(vpa.cell.neg.smpl.pca$x)
vpa.cell.neg.smpl.pca.x <- vpa.cell.neg.smpl.pca.x %>%
bind_cols(
vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.cell.neg.smpl.pca.x) <- vpa.cell.neg.smpl.pca.x$Samples
vpa.cell.neg.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (34.3% Var)") +
ylab("PC2 (23.8%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Cells / Negative Mode")
vpa.cell.neg.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (16.6% Var)") +
ylab("PC4 (7.1%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Cells / Negative Mode")
### PCA on all Samples ###
vpa.cell.pos.full.pca <- vpa.cell.pos.noNA %>%
select(starts_with("VPApC")) %>%
# good idea to center data before pca, but scaling should not be necessary
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
# plot variance explained by each new principal component
plot(
(vpa.cell.pos.full.pca$sdev ^ 2) * 100 / sum(vpa.cell.pos.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA-only / Cells / Positive Mode",
type = "b"
)
vpa.cell.pos.full.pca.x <- as.data.frame(vpa.cell.pos.full.pca$x)
row.names(vpa.cell.pos.full.pca.x) <- vpa.cell.pos.noNA$Samples
vpa.cell.pos.full.pca.x <- vpa.cell.pos.full.pca.x %>%
bind_cols(vpa.cell.pos.noNA %>% select(Group:Experiment))
vpa.cell.pos.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (90.0% Var)") +
ylab("PC2 (4.2%)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA-only / Cells / Positive Mode")
### Samples and QC ###
vpa.cell.pos.smpl.QC.pca <- vpa.cell.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.cell.pos.smpl.QC.pca$sdev ^ 2) * 100 / sum(vpa.cell.pos.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA-only / Cells / Positive Mode",
type = "b"
)
vpa.cell.pos.smpl.QC.pca.x <- as.data.frame(vpa.cell.pos.smpl.QC.pca$x)
vpa.cell.pos.smpl.QC.pca.x <- vpa.cell.pos.smpl.QC.pca.x %>%
bind_cols(
vpa.cell.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.cell.pos.smpl.QC.pca.x) <- vpa.cell.pos.smpl.QC.pca.x$Samples
vpa.cell.pos.smpl.QC.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (55.0% Var)") +
ylab("PC2 (15.0%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Cells / Positive Mode")
vpa.cell.pos.smpl.QC.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (11.4% Var)") +
ylab("PC4 (5.0%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Cells / Positive Mode")
### Experimental Samples Only ###
vpa.cell.pos.smpl.pca <- vpa.cell.pos.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.cell.pos.smpl.pca$sdev ^ 2) * 100 / sum(vpa.cell.pos.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA-only / Cells / Positive Mode",
type = "b"
)
vpa.cell.pos.smpl.pca.x <- as.data.frame(vpa.cell.pos.smpl.pca$x)
vpa.cell.pos.smpl.pca.x <- vpa.cell.pos.smpl.pca.x %>%
bind_cols(
vpa.cell.pos.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.cell.pos.smpl.pca.x) <- vpa.cell.pos.smpl.pca.x$Samples
vpa.cell.pos.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (57.5% Var)") +
ylab("PC2 (19.6%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Cells / Positive Mode")
vpa.cell.pos.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (7.2% Var)") +
ylab("PC4 (4.7%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Cells / Positive Mode")
### PCA on all Samples ###
vpa.med.neg.full.pca <- vpa.med.neg.noNA %>%
select(starts_with("VPAnM")) %>%
# good idea to center data before pca, but scaling should not be necessary
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
# plot variance explained by each new principal component
plot(
(vpa.med.neg.full.pca$sdev ^ 2) * 100 / sum(vpa.med.neg.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA-only / Media / Negative Mode",
type = "b"
)
vpa.med.neg.full.pca.x <- as.data.frame(vpa.med.neg.full.pca$x)
row.names(vpa.med.neg.full.pca.x) <- vpa.med.neg.noNA$Samples
vpa.med.neg.full.pca.x <- vpa.med.neg.full.pca.x %>%
bind_cols(vpa.med.neg.noNA %>% select(Group:Experiment))
vpa.med.neg.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group, shape = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (75.8% Var)") +
ylab("PC2 (8.4%)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA-only / Media / Negative Mode")
### Samples and QC ###
vpa.med.neg.smpl.QC.pca <- vpa.med.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.med.neg.smpl.QC.pca$sdev ^ 2) * 100 / sum(vpa.med.neg.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA-only / Media / Negative Mode",
type = "b"
)
vpa.med.neg.smpl.QC.pca.x <- as.data.frame(vpa.med.neg.smpl.QC.pca$x)
vpa.med.neg.smpl.QC.pca.x <- vpa.med.neg.smpl.QC.pca.x %>%
bind_cols(
vpa.med.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.med.neg.smpl.QC.pca.x) <- vpa.med.neg.smpl.QC.pca.x$Samples
vpa.med.neg.smpl.QC.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (56.8% Var)") +
ylab("PC2 (25.0%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Media / Negative Mode")
vpa.med.neg.smpl.QC.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (9.0% Var)") +
ylab("PC4 (4.1%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Media / Negative Mode")
### Experimental Samples Only ###
vpa.med.neg.smpl.pca <- vpa.med.neg.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.med.neg.smpl.pca$sdev ^ 2) * 100 / sum(vpa.med.neg.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA-only / Media / Negative Mode",
type = "b"
)
vpa.med.neg.smpl.pca.x <- as.data.frame(vpa.med.neg.smpl.pca$x)
vpa.med.neg.smpl.pca.x <- vpa.med.neg.smpl.pca.x %>%
bind_cols(
vpa.med.neg.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.med.neg.smpl.pca.x) <- vpa.med.neg.smpl.pca.x$Samples
vpa.med.neg.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (58.2% Var)") +
ylab("PC2 (27.5%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Media / Negative Mode")
vpa.med.neg.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment, shape = Experiment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (8.3% Var)") +
ylab("PC4 (2.4%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Media / Negative Mode")
### PCA on all Samples ###
vpa.med.pos.full.pca <- vpa.med.pos.noNA %>%
select(starts_with("VPApM")) %>%
# good idea to center data before pca, but scaling should not be necessary
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
# plot variance explained by each new principal component
plot(
(vpa.med.pos.full.pca$sdev ^ 2) * 100 / sum(vpa.med.pos.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA-only / Media / Positive Mode",
type = "b"
)
vpa.med.pos.full.pca.x <- as.data.frame(vpa.med.pos.full.pca$x)
row.names(vpa.med.pos.full.pca.x) <- vpa.med.pos.noNA$Samples
vpa.med.pos.full.pca.x <- vpa.med.pos.full.pca.x %>%
bind_cols(vpa.med.pos.noNA %>% select(Group:Experiment))
vpa.med.pos.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group, shape = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (85.6% Var)") +
ylab("PC2 (6.3%)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA-only / Media / Positive Mode")
### Samples and QC ###
vpa.med.pos.smpl.QC.pca <- vpa.med.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPApM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.med.pos.smpl.QC.pca$sdev ^ 2) * 100 / sum(vpa.med.pos.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA-only / Media / Positive Mode",
type = "b"
)
vpa.med.pos.smpl.QC.pca.x <- as.data.frame(vpa.med.pos.smpl.QC.pca$x)
vpa.med.pos.smpl.QC.pca.x <- vpa.med.pos.smpl.QC.pca.x %>%
bind_cols(
vpa.med.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.med.pos.smpl.QC.pca.x) <- vpa.med.pos.smpl.QC.pca.x$Samples
vpa.med.pos.smpl.QC.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (72.1% Var)") +
ylab("PC2 (15.6%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Media / Positive Mode")
vpa.med.pos.smpl.QC.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (6.0% Var)") +
ylab("PC4 (2.5%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Media / Positive Mode")
### Experimental Samples Only ###
vpa.med.pos.smpl.pca <- vpa.med.pos.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPApM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.med.pos.smpl.pca$sdev ^ 2) * 100 / sum(vpa.med.pos.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA-only / Media / Positive Mode",
type = "b"
)
vpa.med.pos.smpl.pca.x <- as.data.frame(vpa.med.pos.smpl.pca$x)
vpa.med.pos.smpl.pca.x <- vpa.med.pos.smpl.pca.x %>%
bind_cols(
vpa.med.pos.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.med.pos.smpl.pca.x) <- vpa.med.pos.smpl.pca.x$Samples
vpa.med.pos.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (80.9% Var)") +
ylab("PC2 (11.2%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Media / Positive Mode")
Is VPA metabolised by cells?
vpa.med.neg.noNA %>%
select(Samples:Experiment, VPAnM24) %>%
unite(Sample_Type, Group:Treatment, sep = "_") %>%
ggplot(aes(Sample_Type, VPAnM24)) +
geom_jitter(alpha = 0.8, width = 0.1)
vpa.med.neg.noNA %>%
select(Samples:Experiment, VPAnM24) %>%
unite(Sample_Type, Group:Treatment, sep = "_") %>%
ggplot(aes(Sample_Type, VPAnM24)) +
geom_boxplot()
vpa.med.neg.noNA %>%
select(Samples:Experiment, VPAnM24) %>%
filter(Treatment == "vpa") %>%
group_by(Group) %>%
summarize(
vpa.avg = mean(VPAnM24),
vpa.sd = sd(VPAnM24)
)
# A tibble: 2 x 3
Group vpa.avg vpa.sd
<chr> <dbl> <dbl>
1 empty 20.8 0.413
2 sample 20.8 0.506
It seems likely that VPA is not getting metabolised to a great extent, but cannot be sure.
# sample prep
vpa.cell.neg.smpl.data <- vpa.cell.neg.noNA %>%
filter(Group == "sample")
vpa.cell.neg.data.for.sva <- as.matrix(
vpa.cell.neg.smpl.data[, which(
colnames(vpa.cell.neg.smpl.data) == "VPAnC1"
):ncol(vpa.cell.neg.smpl.data)]
)
row.names(vpa.cell.neg.data.for.sva) <- vpa.cell.neg.smpl.data$Samples
vpa.cell.neg.data.for.sva <- t(vpa.cell.neg.data.for.sva)
# pheno prep
vpa.cell.neg.data.pheno <- as.data.frame(vpa.cell.neg.smpl.data[, 5:7])
row.names(vpa.cell.neg.data.pheno) <- vpa.cell.neg.smpl.data$Samples
# sva calculation
vpa.cell.neg.mod.vpa <- model.matrix(~ as.factor(Treatment), data = vpa.cell.neg.data.pheno)
vpa.cell.neg.mod0 <- model.matrix(~ 1, data = vpa.cell.neg.data.pheno)
set.seed(2018)
num.sv(vpa.cell.neg.data.for.sva, vpa.cell.neg.mod.vpa, method = "be")
[1] 3
set.seed(2018)
num.sv(vpa.cell.neg.data.for.sva, vpa.cell.neg.mod.vpa, method = "leek")
[1] 0
set.seed(2018)
vpa.cell.neg.sv <- sva(vpa.cell.neg.data.for.sva, vpa.cell.neg.mod.vpa, vpa.cell.neg.mod0)
Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5
# extract the surrogate variables
vpa.cell.neg.surr.var <- as.data.frame(vpa.cell.neg.sv$sv)
colnames(vpa.cell.neg.surr.var) <- c("S1", "S2", "S3")
vpa.cell.neg.covar <- vpa.cell.neg.smpl.pca.x %>%
select(Samples, PC1:PC4) %>%
inner_join(
cbind(vpa.cell.neg.data.pheno, vpa.cell.neg.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vpa.cell.other.info, by = "Samples") %>%
as_tibble()
vpa.cell.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples, exp_plate:S3) %>%
gather("surr_var", "value", S1:S3) %>%
ggplot(aes(exp_plate, value, color = exp_plate)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1) +
facet_wrap(~ surr_var)
vpa.cell.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples, Treatment:S3) %>%
gather("surr_var", "value", S1:S3) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ surr_var)
vpa.cell.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples:exp_plate) %>%
gather("PC", "value", PC1:PC4) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
vpa.cell.neg.covar %>%
select(PC1:PC4, S1:S3) %>%
ggcorr(label = TRUE)
vpa.cell.neg.covar %>%
select(PC1:PC4, S1:S3) %>%
ggpairs()
vpa.cell.neg.covar %>%
ggplot(aes(S3)) +
geom_histogram(bins = 50)
vpa.cell.neg.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggcorr(label = TRUE)
vpa.cell.neg.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggpairs()
vpa.cell.neg.covar %>%
ggplot(aes(run_order, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.neg.covar %>%
select(S1:run_order) %>%
ggcorr(label = TRUE)
vpa.cell.neg.covar %>%
select(S1:run_order) %>%
ggpairs()
vpa.cell.neg.covar %>%
ggplot(aes(run_order, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.neg.covar %>%
ggplot(aes(run_order, S2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, S2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.neg.covar %>%
ggplot(aes(run_order, prot_conc_ug_uL, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
colnames(vpa.cell.neg.mod.vpa) <- c("cntrl", "DRUGvsCNTRL")
# combine the full model matrix and the surrogate variables into one
vpa.cell.neg.d.sv <- cbind(vpa.cell.neg.mod.vpa, vpa.cell.neg.surr.var[, 1:2])
head(vpa.cell.neg.d.sv)
cntrl DRUGvsCNTRL S1 S2
T1_P1_C1 1 0 -0.02572670 0.30588286
T1_P1_C2 1 0 0.22693416 -0.04248658
T1_P1_C3 1 0 0.24683594 0.01195802
T1_P1_V1 1 1 0.26304093 0.05597521
T1_P1_V2 1 1 -0.08024336 0.19758531
T1_P1_V3 1 1 -0.08965598 0.26184963
vpa.cell.neg.top.table <- vpa.cell.neg.data.for.sva %>%
# fit a linear model
lmFit(vpa.cell.neg.d.sv) %>%
# calculate the test statistics
eBayes() %>%
# select the top features that have a p-value < 0.05 after Bonferroni multiple hypothesis correction
topTable(coef = "DRUGvsCNTRL", adjust = "bonferroni", p.value = 0.05, n = nrow(vpa.cell.neg.data.for.sva))
# what the result looks like:
head(vpa.cell.neg.top.table)
logFC AveExpr t P.Value adj.P.Val
VPAnC119 -1.1117931 16.34932 -16.036945 2.285612e-13 4.091246e-11
VPAnC114 0.9324087 15.08073 11.000854 3.019527e-10 5.404953e-08
VPAnC152 -1.3070183 16.93098 -10.520550 6.782678e-10 1.214099e-07
VPAnC148 -0.6677076 14.87538 -7.893929 9.235897e-08 1.653226e-05
VPAnC18 -0.6065030 21.20349 -7.811684 1.092767e-07 1.956052e-05
VPAnC44 0.5297000 22.56006 7.496710 2.098579e-07 3.756456e-05
B
VPAnC119 20.769050
VPAnC114 13.612185
VPAnC152 12.796738
VPAnC148 7.826527
VPAnC18 7.656136
VPAnC44 6.995121
# make result more informative
vpa.cell.neg.top.w.info <- vpa.cell.neg.top.table %>%
rownames_to_column("compound_short") %>%
mutate(
vpa_div_cntrl = 2 ^ logFC,
change_in_vpa = ifelse(vpa_div_cntrl < 1, "down", "up")
) %>%
inner_join(vpa.cell.neg.compound.info, by = "compound_short") %>%
filter(vpa_div_cntrl > 1.2 | vpa_div_cntrl < 0.83) %>%
arrange(change_in_vpa, desc(vpa_div_cntrl))
vpa.cell.neg.top.w.info %>%
select(compound_short, compound_full, change_in_vpa, vpa_div_cntrl)
compound_short compound_full change_in_vpa
1 VPAnC58 N-Acetylserine down
2 VPAnC65 Xanthine down
3 VPAnC97 myo-Inositol down
4 VPAnC105 N-alpha-Acetylglutamine down
5 VPAnC41 Asparagine down
6 VPAnC23 Thiosulfate down
7 VPAnC46 Deoxyribose down
8 VPAnC135 Glycerol 1-phosphoserine down
9 VPAnC86 G3P down
10 VPAnC18 Glyceric Acid down
11 VPAnC148 Sedoheptulose 7-phosphate down
12 VPAnC136 D-Glucose 6-phosphate down
13 VPAnC172 Succinoadenosine down
14 VPAnC144 Xanthosine down
15 VPAnC119 Glycerylphosphorylethanolamine down
16 VPAnC10 Lactic Acid down
17 VPAnC176 dTDP down
18 VPAnC159 CMP down
19 VPAnC127 Ribose 5-Phosphate down
20 VPAnC152 GlcNAc-1P down
21 VPAnC48 Adenine down
22 VPAnC160 UMP down
23 VPAnC138 Inosine down
24 VPAnC168 AMP down
25 VPAnC169 GMP down
26 VPAnC114 Homocitric Acid up
27 VPAnC54 Valproic acid up
28 VPAnC200 ADP-Glucose up
29 VPAnC53 O-Phosphoethanolamine up
30 VPAnC157 11Z,14Z-Eicosadienoic Acid (20:2 n-6) up
31 VPAnC189 CTP up
32 VPAnC194 GTP up
33 VPAnC67 3-Sulfinoalanine up
34 VPAnC193 ATP up
35 VPAnC73 2-Aminoadipic Acid up
36 VPAnC44 Aspartic Acid up
37 VPAnC124 Cystathionine up
38 VPAnC190 UTP up
39 VPAnC17 Serine up
40 VPAnC30 Threonine up
41 VPAnC2 Glycine up
vpa_div_cntrl
1 0.8006513
2 0.7733022
3 0.7727024
4 0.7642022
5 0.7475017
6 0.7258479
7 0.7200514
8 0.6898581
9 0.6742282
10 0.6567868
11 0.6295061
12 0.6155587
13 0.5263833
14 0.4899408
15 0.4627186
16 0.4458119
17 0.4260245
18 0.4086860
19 0.4066015
20 0.4041553
21 0.3971414
22 0.3471526
23 0.3070139
24 0.1705093
25 0.1357460
26 1.9084597
27 1.8537394
28 1.5939533
29 1.5623231
30 1.5330869
31 1.5075878
32 1.4845507
33 1.4760588
34 1.4597931
35 1.4558662
36 1.4436289
37 1.4029416
38 1.3951522
39 1.2483656
40 1.2386250
41 1.2121890
#write_csv(path = "./results/vpa_cell_neg_top_hits_w_FC_pval_sv_mod.csv", vpa.cell.neg.top.w.info)
vpa.cell.neg.gathered <- vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.cell.neg.surr.var) %>%
select(Samples, Treatment, S1:S2, starts_with("VPAnC")) %>%
gather(key = "Compound", value = "Abundance", VPAnC1:VPAnC99)
# structure so far:
glimpse(vpa.cell.neg.gathered)
Observations: 4,117
Variables: 6
$ Samples <chr> "T1_P1_C1", "T1_P1_C2", "T1_P1_C3", "T1_P1_V1", "T1_...
$ Treatment <chr> "cntrl", "cntrl", "cntrl", "vpa", "vpa", "vpa", "cnt...
$ S1 <dbl> -0.02572670, 0.22693416, 0.24683594, 0.26304093, -0....
$ S2 <dbl> 0.30588286, -0.04248658, 0.01195802, 0.05597521, 0.1...
$ Compound <chr> "VPAnC1", "VPAnC1", "VPAnC1", "VPAnC1", "VPAnC1", "V...
$ Abundance <dbl> 15.08795, 14.67335, 14.81717, 14.55139, 14.79476, 15...
vpa.cell.neg.nested <- vpa.cell.neg.gathered %>%
group_by(Compound) %>%
nest() %>%
# apply a linear model to each individual compound, as a function of the surrogate variables
mutate(model = map(data, ~lm(Abundance ~ S1 + S2, data = .))) %>%
# use broom to tidy up the output
mutate(augment_model = map(model, augment))
# result to far:
vpa.cell.neg.nested
# A tibble: 179 x 4
Compound data model augment_model
<chr> <list> <list> <list>
1 VPAnC1 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
2 VPAnC10 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
3 VPAnC100 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
4 VPAnC101 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
5 VPAnC102 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
6 VPAnC103 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
7 VPAnC104 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
8 VPAnC105 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
9 VPAnC106 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
10 VPAnC107 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
# ... with 169 more rows
# now to get the residuals out for each compound
vpa.cell.neg.modSV.resid <- vpa.cell.neg.nested %>%
unnest(data, augment_model) %>%
select(Samples, Treatment, Compound, .resid) %>%
# return to long format
spread(Compound, .resid)
glimpse(vpa.cell.neg.modSV.resid[, 1:5])
Observations: 23
Variables: 5
$ Samples <chr> "T1_P1_C1", "T1_P1_C2", "T1_P1_C3", "T1_P1_V1", "T1_...
$ Treatment <chr> "cntrl", "cntrl", "cntrl", "vpa", "vpa", "vpa", "cnt...
$ VPAnC1 <dbl> 0.09809226, -0.06724331, 0.04638420, -0.24379320, -0...
$ VPAnC10 <dbl> 0.47952620, 0.79800463, 0.31534236, -1.11205044, -0....
$ VPAnC100 <dbl> 0.064964248, -0.108617277, 0.101704046, 0.098637074,...
vpa.cell.neg.modSV.resid %>%
select(Samples, one_of(vpa.cell.neg.top.w.info$compound_short)) %>%
HeatmapPrepAlt("VPAnC") %>%
t() %>%
heatmaply(
colors = viridis(n = 10, option = "magma"),
xlab = "Samples", ylab = "Compounds",
main = "Statistically significant compounds\nVPA-Only Experiment / Cells / Negative Mode",
margins = c(50, 50, 75, 30),
labRow = vpa.cell.neg.top.w.info$compound_full,
k_col = 2, k_row = 2
)
test <- vpa.cell.neg.modSV.resid %>%
group_by(Treatment) %>%
summarize_at(vars(VPAnC1:VPAnC99), mean) %>%
gather("compound", "avg", -Treatment) %>%
spread(Treatment, avg) %>%
mutate(test.sub = vpa - cntrl, test.div = vpa / cntrl)
test
## # A tibble: 179 x 5
## compound cntrl vpa test.sub test.div
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 VPAnC1 0.117 -0.107 -0.224 -0.917
## 2 VPAnC10 0.577 -0.529 -1.11 -0.917
## 3 VPAnC100 0.0345 -0.0316 -0.0661 -0.917
## 4 VPAnC101 0.327 -0.299 -0.626 -0.917
## 5 VPAnC102 -0.125 0.114 0.239 -0.917
## 6 VPAnC103 0.190 -0.174 -0.364 -0.917
## 7 VPAnC104 0.0708 -0.0649 -0.136 -0.917
## 8 VPAnC105 0.192 -0.176 -0.368 -0.917
## 9 VPAnC106 -0.140 0.128 0.269 -0.917
## 10 VPAnC107 -0.0141 0.0129 0.0270 -0.917
## # ... with 169 more rows
test %>%
inner_join(vpa.cell.neg.top.w.info, by = c("compound" = "compound_short")) %>%
ggplot(aes(test.sub, logFC)) +
geom_point(size = 2, alpha = 0.6) +
geom_abline(intercept = 0, slope = 1)
vpa.cell.neg.C1.model <- vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.cell.neg.surr.var) %>%
lm(VPAnC1 ~ S1 + S2, data = .)
summary(vpa.cell.neg.C1.model)
##
## Call:
## lm(formula = VPAnC1 ~ S1 + S2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41211 -0.10953 0.04638 0.11134 0.36551
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.79993 0.04125 358.754 < 2e-16 ***
## S1 -0.14752 0.19785 -0.746 0.46456
## S2 0.60854 0.19785 3.076 0.00596 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1978 on 20 degrees of freedom
## Multiple R-squared: 0.3337, Adjusted R-squared: 0.2671
## F-statistic: 5.008 on 2 and 20 DF, p-value: 0.01725
test2 <- test %>%
inner_join(vpa.cell.neg.top.w.info, by = c("compound" = "compound_short")) %>%
inner_join(
vpa.cell.neg.data.for.sva %>%
lmFit(vpa.cell.neg.mod.vpa) %>%
eBayes() %>%
topTable(coef = "DRUGvsCNTRL", adjust = "bonferroni", p.value = 0.05, n = nrow(vpa.cell.neg.data.for.sva)) %>%
rownames_to_column("compound_short") %>%
mutate(
vpa_div_cntrl = 2 ^ logFC,
change_in_vpa = ifelse(vpa_div_cntrl < 1, "down", "up")
) %>%
inner_join(vpa.cell.neg.compound.info, by = "compound_short") %>%
filter(vpa_div_cntrl > 1.2 | vpa_div_cntrl < 0.83) %>%
arrange(change_in_vpa, desc(vpa_div_cntrl)),
by = c("compound" = "compound_short"),
suffix = c("_w_surr", "_no_surr")
)
test2 %>%
ggplot(aes(logFC_no_surr, logFC_w_surr)) +
geom_point(size = 2, alpha = 0.8) +
geom_abline(intercept = 0, slope = 1)
vpa.cell.neg.model.tidy <- vpa.cell.neg.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ S1 + S2, data = .))) %>%
mutate(tidy_model = map(model, tidy))
vpa.cell.neg.model.tidy
## # A tibble: 179 x 4
## Compound data model tidy_model
## <chr> <list> <list> <list>
## 1 VPAnC1 <tibble [23 x 5]> <S3: lm> <tibble [3 x 5]>
## 2 VPAnC10 <tibble [23 x 5]> <S3: lm> <tibble [3 x 5]>
## 3 VPAnC100 <tibble [23 x 5]> <S3: lm> <tibble [3 x 5]>
## 4 VPAnC101 <tibble [23 x 5]> <S3: lm> <tibble [3 x 5]>
## 5 VPAnC102 <tibble [23 x 5]> <S3: lm> <tibble [3 x 5]>
## 6 VPAnC103 <tibble [23 x 5]> <S3: lm> <tibble [3 x 5]>
## 7 VPAnC104 <tibble [23 x 5]> <S3: lm> <tibble [3 x 5]>
## 8 VPAnC105 <tibble [23 x 5]> <S3: lm> <tibble [3 x 5]>
## 9 VPAnC106 <tibble [23 x 5]> <S3: lm> <tibble [3 x 5]>
## 10 VPAnC107 <tibble [23 x 5]> <S3: lm> <tibble [3 x 5]>
## # ... with 169 more rows
# now to get the residuals out for each compound
vpa.cell.neg.surr.var.model.tidy <- vpa.cell.neg.model.tidy %>%
unnest(tidy_model) %>%
filter(term != "(Intercept)")
vpa.cell.neg.surr.var.model.tidy %>%
ggplot(aes(p.value, fill = term)) +
geom_histogram(bins = 50) +
facet_wrap(~ term) +
theme_minimal() +
geom_vline(xintercept = 0.05, alpha = 0.5, size = 1, ) +
theme(legend.position = "none")
ncol(vpa.cell.neg.noNA %>% select(starts_with("VPAnC")))
## [1] 179
vpa.cell.neg.surr.var.model.tidy %>%
filter(p.value < 0.05) %>%
group_by(term) %>%
count()
## # A tibble: 2 x 2
## # Groups: term [2]
## term n
## <chr> <int>
## 1 S1 37
## 2 S2 117
vpa.cell.neg.surr.var.model.tidy %>%
filter(p.value < 0.05 & term == "S1") %>%
inner_join(vpa.cell.neg.compound.info, by = c("Compound" = "compound_short"))
## # A tibble: 37 x 11
## Compound term estimate std.error statistic p.value compound_full
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 VPAnC104 S1 1.33 0.436 3.05 6.39e- 3 2-Phosphogly~
## 2 VPAnC113 S1 -0.386 0.107 -3.62 1.71e- 3 Tryptophan
## 3 VPAnC12 S1 10.6 0.251 42.4 4.70e-21 2-Aminobutyr~
## 4 VPAnC13 S1 10.6 0.252 42.2 5.16e-21 BAIBA
## 5 VPAnC132 S1 0.317 0.127 2.50 2.12e- 2 Neopterin
## 6 VPAnC139 S1 0.813 0.383 2.12 4.63e- 2 Estradiol-17~
## 7 VPAnC147 S1 0.431 0.152 2.83 1.03e- 2 Ophthalmic A~
## 8 VPAnC148 S1 0.940 0.398 2.36 2.83e- 2 Sedoheptulos~
## 9 VPAnC167 S1 3.60 0.769 4.68 1.45e- 4 D-Fructose 1~
## 10 VPAnC177 S1 0.689 0.283 2.43 2.45e- 2 CDP
## # ... with 27 more rows, and 4 more variables: formula <chr>, mass <dbl>,
## # rt <dbl>, cas_id <chr>
vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.cell.neg.surr.var) %>%
select(Samples, Treatment, S1:S2, VPAnC189, VPAnC190, VPAnC192:VPAnC194) %>%
gather("surr_var", "surr_var_val", S1:S2) %>%
gather("metabolite", "abundance", VPAnC189:VPAnC194) %>%
ggplot(aes(surr_var_val, abundance, color = surr_var)) +
geom_point(size = 2, alpha = 0.8) +
facet_grid(surr_var ~ metabolite, scales = "free") +
theme_minimal()
vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.cell.neg.surr.var) %>%
select(Samples, Treatment, S1:S2) %>%
gather("surr_var", "value", S1:S2) %>%
ggplot(aes(Treatment, value, color = surr_var)) +
geom_jitter(width = 0.1) +
facet_wrap(~ surr_var) +
theme_minimal()
### PCA - cleaned data ###
vpa.cell.neg.modSV.pca <- vpa.cell.neg.modSV.resid %>%
select(starts_with("VPAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(vpa.cell.neg.modSV.pca$sdev^ 2 * 100 / sum(vpa.cell.neg.modSV.pca$sdev ^ 2))
vpa.cell.neg.modSV.pca.x <- as.data.frame(vpa.cell.neg.modSV.pca$x)
row.names(vpa.cell.neg.modSV.pca.x) <- vpa.cell.neg.modSV.resid$Samples
vpa.cell.neg.modSV.pca.x <- vpa.cell.neg.modSV.pca.x %>%
bind_cols(vpa.cell.neg.modSV.resid %>% select(Treatment))
vpa.cell.neg.modSV.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (54.1% Var)") +
ylab("PC2 (12.9% Var)")
vpa.cell.neg.modSV.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (8.4% Var)") +
ylab("PC4 (6.1% Var)")
vpa.med.metab.plot <- vpa.med.neg.noNA %>%
filter(Group != "QC") %>%
select(Samples:Experiment, VPAnM24) %>%
unite(Sample_Type, Group:Treatment, sep = "_") %>%
ggplot(aes(Sample_Type, VPAnM24, color = Sample_Type, fill = Sample_Type)) +
geom_boxplot(alpha = 0.3) +
scale_fill_manual(
values = c("#009E73", "#CC79A7", "#56B4E9", "#E69F00", "#999999"),
labels = c("Empty\nCNTRL", "Empty\nVPA", "Exp Sample\nCNTRL", "Exp Sample\nVPA", "Solv")
) +
scale_color_manual(
values = c("#009E73", "#CC79A7", "#56B4E9", "#E69F00", "#999999"),
labels = c("Empty\nCNTRL", "Empty\nVPA", "Exp Sample\nCNTRL", "Exp Sample\nVPA", "Solv")
) +
scale_x_discrete(labels = c("Empty\nCNTRL", "Empty\nVPA", "Exp Sample\nCNTRL", "Exp Sample\nVPA", "Solv")) +
#geom_jitter(alpha = 0.8, width = 0.25, size = 2, color = "black") +
geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.75) +
theme_minimal() +
ylab("log2(VPA Abundance)") +
xlab("") +
theme(
legend.position = "none",
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 10)
) +
ylim(14, 22)
vpa.med.neg.noNA %>%
filter(Treatment == "vpa") %>%
select(Samples:Experiment, VPAnM24) %>%
lm(VPAnM24 ~ Group, data = .) %>%
summary()
Call:
lm(formula = VPAnM24 ~ Group, data = .)
Residuals:
Min 1Q Median 3Q Max
-1.0244 -0.2042 0.1134 0.3667 0.5863
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 20.84908 0.28435 73.321 <2e-16 ***
Groupsample -0.02243 0.31792 -0.071 0.945
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4925 on 13 degrees of freedom
Multiple R-squared: 0.0003829, Adjusted R-squared: -0.07651
F-statistic: 0.00498 on 1 and 13 DF, p-value: 0.9448
# no statistically significant difference between Empty / VPA and Exp Sample / VPA
vpa.cell.metab.plot <- vpa.cell.neg.noNA %>%
filter(Group != "QC") %>%
select(Samples:Experiment, VPAnC54) %>%
unite(Sample_Type, Group:Treatment, sep = "_") %>%
ggplot(aes(Sample_Type, VPAnC54, color = Sample_Type, fill = Sample_Type)) +
geom_boxplot(alpha = 0.3) +
scale_fill_manual(
values = c("#009E73", "#CC79A7", "#56B4E9", "#E69F00", "#999999"),
labels = c("Empty\nCNTRL", "Empty\nVPA", "Exp Sample\nCNTRL", "Exp Sample\nVPA", "Solv")
) +
scale_color_manual(
values = c("#009E73", "#CC79A7", "#56B4E9", "#E69F00", "#999999"),
labels = c("Empty\nCNTRL", "Empty\nVPA", "Exp Sample\nCNTRL", "Exp Sample\nVPA", "Solv")
) +
scale_x_discrete(labels = c("Empty\nCNTRL", "Empty\nVPA", "Exp Sample\nCNTRL", "Exp Sample\nVPA", "Solv")) +
#geom_jitter(alpha = 0.8, width = 0.25, size = 2, color = "black") +
geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.75) +
theme_minimal() +
ylab("log2(VPA Abundance)") +
xlab("") +
theme(
legend.position = "none",
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 10)
) +
ylim(14, 22)
vpa.cell.resid.metab.plot <- vpa.cell.neg.modSV.resid %>%
select(Samples:Treatment, VPAnC54) %>%
ggplot(aes(Treatment, VPAnC54, color = Treatment, fill = Treatment)) +
geom_boxplot(alpha = 0.3) +
scale_fill_manual(
values = c("#56B4E9", "#E69F00"),
labels = c("Exp Sample\nCNTRL", "Exp Sample\nVPA")
) +
scale_color_manual(
values = c("#56B4E9", "#E69F00"),
labels = c("Exp Sample\nCNTRL", "Exp Sample\nVPA")
) +
scale_x_discrete(labels = c("Exp Sample\nCNTRL", "Exp Sample\nVPA")) +
#geom_jitter(alpha = 0.8, width = 0.25, size = 2, color = "black") +
geom_dotplot(binaxis = "y", stackdir = "center") +
theme_minimal() +
ylab("log2(VPA Abundance)") +
xlab("") +
theme(
legend.position = "none",
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 10)
) +
ylim(-1.5, 1.5)
vpa.metab.cell.neg.surr.var.reg.plot <- vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.cell.neg.surr.var) %>%
select(Samples, Treatment, S1:S2, VPAnC54) %>%
gather("surr_var", "value", S1:S2) %>%
ggplot(aes(value, VPAnC54, color = surr_var)) +
geom_smooth(method = "lm") +
geom_point(size = 3, alpha = 0.8) +
facet_wrap(~surr_var) +
scale_color_manual(values = c("#90353b", "#55752f")) +
theme_minimal() +
theme(legend.position = "none")
vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.cell.neg.surr.var) %>%
select(Samples, Treatment, S1:S2, VPAnC54) %>%
lm(VPAnC54 ~ S1 + S2, data = .) %>%
summary()
Call:
lm(formula = VPAnC54 ~ S1 + S2, data = .)
Residuals:
Min 1Q Median 3Q Max
-0.9072 -0.4789 -0.0543 0.3233 1.2961
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 18.436864 0.128279 143.725 <2e-16 ***
S1 -0.006711 0.615205 -0.011 0.991
S2 0.687606 0.615205 1.118 0.277
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.6152 on 20 degrees of freedom
Multiple R-squared: 0.05879, Adjusted R-squared: -0.03533
F-statistic: 0.6247 on 2 and 20 DF, p-value: 0.5456
vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.cell.neg.surr.var) %>%
select(Samples, Treatment, S1:S2, VPAnC54) %>%
ggplot(aes(S1, S2, color = VPAnC54)) +
geom_point(size = 3) +
scale_color_viridis_c(option = "C") +
theme_minimal() +
xlim(-0.6, 0.4) +
ylim(-0.6, 0.4)
vpa.med.metab.plot
`stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
vpa.cell.metab.plot
`stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
vpa.metab.cell.neg.surr.var.reg.plot
vpa.cell.resid.metab.plot
`stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
vpa.metabolism.grid.plot <- plot_grid(
plot_grid(
vpa.med.metab.plot, vpa.cell.metab.plot,
ncol = 2,
labels = c("A", "B")
),
plot_grid(
vpa.metab.cell.neg.surr.var.reg.plot, vpa.cell.resid.metab.plot,
ncol = 2,
labels = c("C", "D"),
rel_widths = c(1, 0.5)
),
ncol = 1,
rel_heights = c(1.5, 1)
)
`stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
#save_plot("./results/vpa_metab_plot.png", vpa.metabolism.grid.plot, base_height = 8, base_width = 8)
library(ggcorrplot)
vpa.cell.full.pca.plot <- vpa.cell.neg.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 3, alpha = 0.8) +
xlab("PC1 (90.5% Var)") +
ylab("PC2 (3.1%)") +
theme_minimal() +
scale_color_brewer(palette = "Dark2", labels = c("Empty", "QC", "Exp Smpl", "Solv")) +
theme(legend.title = element_blank())
vpa.cell.smpl.QC.pca.plot <- vpa.cell.neg.smpl.QC.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 3, alpha = 0.8) +
xlab("PC1 (35.1% Var)") +
ylab("PC2 (19.7%)") +
scale_color_manual(values = c("#56B4E9", "#000000", "#E69F00"), labels = c("Control", "QC", "VPA")) +
theme_minimal() +
theme(legend.title = element_blank())
vpa.cell.neg.smpl.pca.plot <- vpa.cell.neg.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 3, alpha = 0.8) +
xlab("PC1 (34.3% Var)") +
ylab("PC2 (23.8%)") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
theme_minimal() +
theme(legend.title = element_blank())
vpa.cell.neg.first.pca.grid <- plot_grid(
vpa.cell.full.pca.plot, vpa.cell.smpl.QC.pca.plot, vpa.cell.neg.smpl.pca.plot,
labels = c("A", "B", "C"),
ncol = 3
)
vpa.cell.neg.run.order.facet <- vpa.cell.neg.covar %>%
select(Samples, Treatment:Experiment, run_order, PC2, S1:S2) %>%
gather("measure", "value", PC2:S2) %>%
ggplot(aes(run_order, value, color = measure)) +
geom_smooth(method = "lm", se = FALSE) +
geom_point(size = 3, alpha = 0.8, aes(shape = Experiment)) +
facet_wrap(~ measure, ncol = 3, scales = "free") +
theme_minimal() +
scale_color_stata()
vpa.cell.neg.prot.conc.facet <- vpa.cell.neg.covar %>%
select(Samples, Treatment:Experiment, prot_conc_ug_uL, PC2, S1:S2) %>%
gather("measure", "value", PC2:S2) %>%
ggplot(aes(prot_conc_ug_uL, value, color = measure)) +
geom_smooth(method = "lm", se = FALSE) +
geom_point(size = 3, alpha = 0.8, aes(shape = Experiment)) +
facet_wrap(~ measure, ncol = 3, scales = "free") +
theme_minimal() +
scale_color_stata()
vpa.cell.neg.cor.and.resid.pca.plot <- plot_grid(
vpa.cell.neg.covar %>%
select(PC1:PC4, S1:S2, prot_conc_ug_uL:run_order) %>%
rename("Prot Conc" = prot_conc_ug_uL, "Run Order" = run_order) %>%
cor() %>%
round(1) %>%
ggcorrplot(
ggtheme = theme_minimal,
type = "lower",
lab = TRUE,
lab_size = 3,
outline.color = "white",
show.diag = TRUE,
colors = c("#67a9cf", "#f7f7f7", "#ef8a62")
),
vpa.cell.neg.modSV.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (54.1% Var)") +
ylab("PC2 (12.9% Var)") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
theme_minimal() +
theme(legend.title = element_blank()),
ncol = 2,
labels = c("F", "G")
)
vpa.cell.neg.surr.var.pca.full.grid.plot <- plot_grid(
vpa.cell.neg.first.pca.grid,
vpa.cell.neg.run.order.facet,
vpa.cell.neg.prot.conc.facet,
vpa.cell.neg.cor.and.resid.pca.plot,
labels = c("", "D", "E", ""),
ncol = 1,
rel_heights = c(0.75, 1, 1, 1.5)
)
#save_plot("./results/vpa_cell_neg_sva_pca_full_grid_fig.png", vpa.cell.neg.surr.var.pca.full.grid.plot, base_height = 10, base_width = 8)
vpa.cell.pos.smpl.data <- vpa.cell.pos.noNA %>%
filter(Group == "sample")
vpa.cell.pos.data.for.sva <- as.matrix(
vpa.cell.pos.smpl.data[, which(
colnames(vpa.cell.pos.smpl.data) == "VPApC1"
):ncol(vpa.cell.pos.smpl.data)]
)
row.names(vpa.cell.pos.data.for.sva) <- vpa.cell.pos.smpl.data$Samples
vpa.cell.pos.data.for.sva <- t(vpa.cell.pos.data.for.sva)
vpa.cell.pos.data.pheno <- as.data.frame(vpa.cell.pos.smpl.data[, 5:7])
row.names(vpa.cell.pos.data.pheno) <- vpa.cell.pos.smpl.data$Samples
# sva calculation
vpa.cell.pos.mod.vpa <- model.matrix(~ as.factor(Treatment), data = vpa.cell.pos.data.pheno)
vpa.cell.pos.mod0 <- model.matrix(~ 1, data = vpa.cell.pos.data.pheno)
set.seed(2018)
num.sv(vpa.cell.pos.data.for.sva, vpa.cell.pos.mod.vpa, method = "be")
[1] 2
set.seed(2018)
num.sv(vpa.cell.pos.data.for.sva, vpa.cell.pos.mod.vpa, method = "leek")
[1] 2
set.seed(2018)
vpa.cell.pos.sv <- sva(vpa.cell.pos.data.for.sva, vpa.cell.pos.mod.vpa, vpa.cell.pos.mod0)
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
vpa.cell.pos.surr.var <- as.data.frame(vpa.cell.pos.sv$sv)
colnames(vpa.cell.pos.surr.var) <- c("S1", "S2")
vpa.cell.pos.covar <- vpa.cell.pos.smpl.pca.x %>%
select(Samples, PC1:PC4) %>%
inner_join(
cbind(vpa.cell.pos.data.pheno, vpa.cell.pos.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vpa.cell.other.info, by = "Samples") %>%
as_tibble()
vpa.cell.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples, exp_plate:S2) %>%
gather("surr_var", "value", S1:S2) %>%
ggplot(aes(exp_plate, value, color = exp_plate)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1) +
facet_wrap(~ surr_var)
vpa.cell.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples, Treatment:S2) %>%
gather("surr_var", "value", S1:S2) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ surr_var)
vpa.cell.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples:exp_plate) %>%
gather("PC", "value", PC1:PC4) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
vpa.cell.pos.covar %>%
select(PC1:PC4, S1:S2) %>%
ggcorr(label = TRUE)
vpa.cell.pos.covar %>%
select(PC1:PC4, S1:S2) %>%
ggpairs()
vpa.cell.pos.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggcorr(label = TRUE)
vpa.cell.pos.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggpairs()
vpa.cell.pos.covar %>%
ggplot(aes(run_order, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, PC1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
select(S1:run_order) %>%
ggcorr(label = TRUE)
vpa.cell.pos.covar %>%
select(S1:run_order) %>%
ggpairs()
vpa.cell.pos.covar %>%
ggplot(aes(run_order, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
ggplot(aes(run_order, S2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, S2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
colnames(vpa.cell.pos.mod.vpa) <- c("cntrl", "DRUGvsCNTRL")
vpa.cell.pos.d.sv <- cbind(vpa.cell.pos.mod.vpa, vpa.cell.pos.surr.var)
head(vpa.cell.pos.d.sv)
cntrl DRUGvsCNTRL S1 S2
T1_P1_C1 1 0 -0.3187310 0.19917816
T1_P1_C2 1 0 -0.2279608 -0.06387899
T1_P1_C3 1 0 -0.1409242 0.01021199
T1_P1_V1 1 1 -0.2796968 -0.36213773
T1_P1_V2 1 1 -0.1999223 -0.13213535
T1_P1_V3 1 1 -0.2955297 0.16841744
vpa.cell.pos.top.table <- vpa.cell.pos.data.for.sva %>%
lmFit(vpa.cell.pos.d.sv) %>%
eBayes() %>%
topTable(coef = "DRUGvsCNTRL", adjust = "bonferroni", p.value = 0.05, n = nrow(vpa.cell.pos.data.for.sva))
vpa.cell.pos.top.w.info <- vpa.cell.pos.top.table %>%
rownames_to_column("compound_short") %>%
mutate(
vpa_div_cntrl = 2 ^ logFC,
change_in_vpa = ifelse(vpa_div_cntrl < 1, "down", "up")
) %>%
inner_join(vpa.cell.pos.compound.info, by = "compound_short") %>%
filter(vpa_div_cntrl > 1.2 | vpa_div_cntrl < 0.83) %>%
arrange(change_in_vpa, desc(vpa_div_cntrl))
vpa.cell.pos.top.w.info %>%
select(compound_short, compound_full, change_in_vpa, vpa_div_cntrl)
compound_short compound_full change_in_vpa
1 VPApC35 Asparagine down
2 VPApC40 N-Methylnicotinate down
3 VPApC51 Xanthine down
4 VPApC101 Glycerol 1-phosphoserine down
5 VPApC24 Nicotinamide down
6 VPApC49 D-Ribose down
7 VPApC89 GlcNAc down
8 VPApC67 G3P down
9 VPApC112 Xanthosine down
10 VPApC38 Adenine down
11 VPApC39 Hypoxanthine down
12 VPApC119 CMP down
13 VPApC14 Uracil down
14 VPApC134 ADP down
15 VPApC107 Inosine down
16 VPApC85 Glycerylphosphorylethanolamine down
17 VPApC60 Fucose down
18 VPApC95 Uridine down
19 VPApC74 D-Galactitol down
20 VPApC120 UMP down
21 VPApC121 NMN down
22 VPApC100 Glycerol-3-phosphocholine down
23 VPApC17 Proline up
24 VPApC37 Aspartic Acid up
25 VPApC52 3-Sulfinoalanine up
26 VPApC172 PC(36:4) up
27 VPApC159 ADP-Glucose up
28 VPApC41 O-Phosphoethanolamine up
29 VPApC56 Methyl-L-Lysine up
30 VPApC90 Cystathionine up
31 VPApC105 Thiamine up
32 VPApC171 PC(35:2) up
vpa_div_cntrl
1 0.8020385
2 0.7628304
3 0.7304178
4 0.6439406
5 0.6390530
6 0.6253119
7 0.6142356
8 0.6080481
9 0.5521731
10 0.4588743
11 0.4538572
12 0.4491053
13 0.4414234
14 0.4391555
15 0.4284642
16 0.4242119
17 0.4078264
18 0.4012096
19 0.3819708
20 0.3793175
21 0.3466167
22 0.1517526
23 1.8933351
24 1.5849782
25 1.5334580
26 1.4855587
27 1.4155297
28 1.3495668
29 1.3212269
30 1.2885595
31 1.2460124
32 1.2131600
#write_csv(path = "./results/vpa_cell_pos_top_hits_w_FC_pval.csv", vpa.cell.pos.top.w.info)
vpa.cell.pos.gathered <- vpa.cell.pos.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.cell.pos.surr.var) %>%
select(Samples, Treatment, S1:S2, starts_with("VPApC")) %>%
gather(key = "Compound", value = "Abundance", VPApC1:VPApC98)
vpa.cell.pos.nested <- vpa.cell.pos.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ S1 + S2, data = .))) %>%
mutate(augment_model = map(model, augment))
vpa.cell.pos.modSV.resid <- vpa.cell.pos.nested %>%
unnest(data, augment_model) %>%
select(Samples, Treatment, Compound, .resid) %>%
spread(Compound, .resid)
vpa.cell.pos.modSV.resid %>%
select(Samples, one_of(vpa.cell.pos.top.w.info$compound_short)) %>%
HeatmapPrepAlt("VPApC") %>%
t() %>%
heatmaply(
colors = viridis(n = 10, option = "magma"),
xlab = "Samples", ylab = "Compounds",
main = "Statistically significant compounds\nVPA-Only Experiment / Cells / Positive Mode",
margins = c(50, 50, 75, 30),
labRow = vpa.cell.pos.top.w.info$compound_full,
k_row = 2, k_col = 2
)
### PCA - cleaned data ###
vpa.cell.pos.modSV.pca <- vpa.cell.pos.modSV.resid %>%
select(starts_with("VPApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
vpa.cell.pos.modSV.pca.x <- as.data.frame(vpa.cell.pos.modSV.pca$x)
row.names(vpa.cell.pos.modSV.pca.x) <- vpa.cell.pos.modSV.resid$Samples
vpa.cell.pos.modSV.pca.x <- vpa.cell.pos.modSV.pca.x %>%
bind_cols(vpa.cell.pos.modSV.resid %>% select(Treatment))
vpa.cell.pos.modSV.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (65.4% Var)") +
ylab("PC2 (12.4% Var)")
vpa.cell.pos.modSV.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (6.5% Var)") +
ylab("PC4 (4.5% Var)")
vpa.med.neg.smpl.data <- vpa.med.neg.noNA %>%
filter(Group == "sample")
vpa.med.neg.data.for.sva <- as.matrix(
vpa.med.neg.smpl.data[, which(
colnames(vpa.med.neg.smpl.data) == "VPAnM1"
):ncol(vpa.med.neg.smpl.data)]
)
row.names(vpa.med.neg.data.for.sva) <- vpa.med.neg.smpl.data$Samples
vpa.med.neg.data.for.sva <- t(vpa.med.neg.data.for.sva)
vpa.med.neg.data.pheno <- as.data.frame(vpa.med.neg.smpl.data[, 5:7])
row.names(vpa.med.neg.data.pheno) <- vpa.med.neg.smpl.data$Samples
vpa.med.neg.mod.vpa <- model.matrix(~ as.factor(Treatment), data = vpa.med.neg.data.pheno)
vpa.med.neg.mod0 <- model.matrix(~ 1, data = vpa.med.neg.data.pheno)
set.seed(2018)
num.sv(vpa.med.neg.data.for.sva, vpa.med.neg.mod.vpa, method = "be")
[1] 1
set.seed(2018)
num.sv(vpa.med.neg.data.for.sva, vpa.med.neg.mod.vpa, method = "leek")
[1] 0
set.seed(2018)
vpa.med.neg.sv <- sva(vpa.med.neg.data.for.sva, vpa.med.neg.mod.vpa, vpa.med.neg.mod0)
Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5
vpa.med.neg.surr.var <- as.data.frame(vpa.med.neg.sv$sv)
colnames(vpa.med.neg.surr.var) <- c("S1")
vpa.med.neg.covar <- vpa.med.neg.smpl.pca.x %>%
select(Samples, PC1:PC4) %>%
inner_join(
cbind(vpa.med.neg.data.pheno, vpa.med.neg.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vpa.cell.other.info, by = "Samples") %>%
as_tibble()
vpa.med.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
ggplot(aes(exp_plate, S1, color = exp_plate)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1)
vpa.med.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
ggplot(aes(exp_plate, S1)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment))
vpa.med.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples:exp_plate) %>%
gather("PC", "value", PC1:PC4) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
vpa.med.neg.covar %>%
select(PC1:PC4, S1) %>%
ggcorr(label = TRUE)
vpa.med.neg.covar %>%
select(PC1:PC4, S1) %>%
ggpairs()
vpa.med.neg.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggcorr(label = TRUE)
vpa.med.neg.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggpairs()
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing non-finite values (stat_density).
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing non-finite values (stat_density).
vpa.med.neg.covar %>%
ggplot(aes(run_order, PC3, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
Warning: Removed 1 rows containing missing values (geom_point).
vpa.med.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
Warning: Removed 1 rows containing missing values (geom_point).
vpa.med.neg.covar %>%
select(S1:run_order) %>%
ggcorr(label = TRUE)
vpa.med.neg.covar %>%
select(S1:run_order) %>%
ggpairs()
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing non-finite values (stat_density).
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing non-finite values (stat_density).
vpa.med.neg.covar %>%
ggplot(aes(run_order, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
Warning: Removed 1 rows containing missing values (geom_point).
vpa.med.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
Warning: Removed 1 rows containing missing values (geom_point).
vpa.med.neg.pca.var <- data.frame(vpa.med.neg.smpl.pca.x$PC3)
colnames(vpa.med.neg.pca.var) <- "PC3"
setequal(row.names(vpa.med.neg.smpl.pca.x), row.names(vpa.med.neg.mod.vpa))
[1] TRUE
head(vpa.med.neg.pca.var)
PC3
1 2.0980413
2 1.2398362
3 0.9684519
4 1.3396242
5 1.0965746
6 0.8120326
colnames(vpa.med.neg.mod.vpa) <- c("cntrl", "DRUGvsCNTRL")
vpa.med.neg.d.sv <- cbind(vpa.med.neg.mod.vpa, vpa.med.neg.pca.var)
head(vpa.med.neg.d.sv)
cntrl DRUGvsCNTRL PC3
T1_P1_C1 1 0 2.0980413
T1_P1_C2 1 0 1.2398362
T1_P1_C3 1 0 0.9684519
T1_P1_V1 1 1 1.3396242
T1_P1_V2 1 1 1.0965746
T1_P1_V3 1 1 0.8120326
vpa.med.neg.top.table <- vpa.med.neg.data.for.sva %>%
lmFit(vpa.med.neg.d.sv) %>%
eBayes() %>%
topTable(coef = "DRUGvsCNTRL", adjust = "bonferroni", p.value = 0.05, n = nrow(vpa.med.neg.data.for.sva))
vpa.med.neg.top.w.info <- vpa.med.neg.top.table %>%
rownames_to_column("compound_short") %>%
mutate(
vpa_div_cntrl = 2 ^ logFC,
change_in_vpa = ifelse(vpa_div_cntrl < 1, "down", "up")
) %>%
inner_join(vpa.med.neg.compound.info, by = "compound_short") %>%
filter(vpa_div_cntrl > 1.2 | vpa_div_cntrl < 0.83) %>%
arrange(change_in_vpa, desc(vpa_div_cntrl))
vpa.med.neg.top.w.info %>%
select(compound_short, compound_full, change_in_vpa, vpa_div_cntrl)
compound_short compound_full change_in_vpa vpa_div_cntrl
1 VPAnM24 Valproic acid up 35.71479
#write_csv(path = "./results/vpa_med_neg_top_hits_w_FC_pval_sv_mod.csv", vpa.med.neg.top.w.info)
vpa.med.neg.gathered <- vpa.med.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.med.neg.pca.var) %>%
select(Samples, Treatment, PC3, starts_with("VPAnM")) %>%
gather(key = "Compound", value = "Abundance", VPAnM1:VPAnM9)
vpa.med.neg.nested <- vpa.med.neg.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ PC3, data = .))) %>%
mutate(augment_model = map(model, augment))
vpa.med.neg.modSV.resid <- vpa.med.neg.nested %>%
unnest(data, augment_model) %>%
select(Samples, Treatment, Compound, .resid) %>%
spread(Compound, .resid)
vpa.med.neg.modSV.resid %>%
select(Samples:Treatment, one_of(vpa.med.neg.top.w.info$compound_short)) %>%
ggplot(aes(Treatment, VPAnM24)) +
geom_boxplot() +
geom_jitter(size = 3, alpha = 0.8, aes(color = Treatment))
### PCA - cleaned data ###
vpa.med.neg.modSV.pca <- vpa.med.neg.modSV.resid %>%
select(starts_with("VPAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
vpa.med.neg.modSV.pca.x <- as.data.frame(vpa.med.neg.modSV.pca$x)
row.names(vpa.med.neg.modSV.pca.x) <- vpa.med.neg.modSV.resid$Samples
vpa.med.neg.modSV.pca.x <- vpa.med.neg.modSV.pca.x %>%
bind_cols(vpa.med.neg.modSV.resid %>% select(Treatment))
vpa.med.neg.modSV.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (63.4% Var)") +
ylab("PC2 (30.0% Var)")
vpa.med.pos.smpl.data <- vpa.med.pos.noNA %>%
filter(Group == "sample")
vpa.med.pos.data.for.sva <- as.matrix(
vpa.med.pos.smpl.data[, which(
colnames(vpa.med.pos.smpl.data) == "VPApM1"
):ncol(vpa.med.pos.smpl.data)]
)
row.names(vpa.med.pos.data.for.sva) <- vpa.med.pos.smpl.data$Samples
vpa.med.pos.data.for.sva <- t(vpa.med.pos.data.for.sva)
vpa.med.pos.data.pheno <- as.data.frame(vpa.med.pos.smpl.data[, 5:7])
row.names(vpa.med.pos.data.pheno) <- vpa.med.pos.smpl.data$Samples
vpa.med.pos.mod.vpa <- model.matrix(~ as.factor(Treatment), data = vpa.med.pos.data.pheno)
vpa.med.pos.mod0 <- model.matrix(~ 1, data = vpa.med.pos.data.pheno)
set.seed(2018)
num.sv(vpa.med.pos.data.for.sva, vpa.med.pos.mod.vpa, method = "be")
[1] 1
set.seed(2018)
num.sv(vpa.med.pos.data.for.sva, vpa.med.pos.mod.vpa, method = "leek")
[1] 0
set.seed(2018)
vpa.med.pos.sv <- sva(vpa.med.pos.data.for.sva, vpa.med.pos.mod.vpa, vpa.med.pos.mod0)
Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5
vpa.med.pos.surr.var <- as.data.frame(vpa.med.pos.sv$sv)
colnames(vpa.med.pos.surr.var) <- c("S1")
vpa.med.pos.covar <- vpa.med.pos.smpl.pca.x %>%
select(Samples, PC1:PC4) %>%
inner_join(
cbind(vpa.med.pos.data.pheno, vpa.med.pos.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vpa.cell.other.info, by = "Samples") %>%
as_tibble()
vpa.med.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
ggplot(aes(exp_plate, S1, color = exp_plate)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1)
vpa.med.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
ggplot(aes(exp_plate, S1)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment))
vpa.med.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples:exp_plate) %>%
gather("PC", "value", PC1:PC4) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
vpa.med.pos.covar %>%
select(PC1:PC4, S1) %>%
ggcorr(label = TRUE)
vpa.med.pos.covar %>%
select(PC1:PC4, S1) %>%
ggpairs()
vpa.med.pos.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggcorr(label = TRUE)
vpa.med.pos.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggpairs()
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing non-finite values (stat_density).
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing non-finite values (stat_density).
vpa.med.pos.covar %>%
ggplot(aes(run_order, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
Warning: Removed 1 rows containing missing values (geom_point).
vpa.med.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
Warning: Removed 1 rows containing missing values (geom_point).
vpa.med.pos.covar %>%
select(S1:run_order) %>%
ggcorr(label = TRUE)
vpa.med.pos.covar %>%
select(S1:run_order) %>%
ggpairs()
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing non-finite values (stat_density).
Warning in (function (data, mapping, alignPercent = 0.6, method =
"pearson", : Removing 1 row that contained a missing value
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing non-finite values (stat_density).
vpa.med.pos.covar %>%
ggplot(aes(run_order, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
Warning: Removed 1 rows containing missing values (geom_point).
vpa.med.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
Warning: Removed 1 rows containing missing values (geom_point).
vpa.med.pos.pca.var <- data.frame(vpa.med.pos.smpl.pca.x$PC2)
colnames(vpa.med.pos.pca.var) <- "PC2"
setequal(row.names(vpa.med.pos.smpl.pca.x), row.names(vpa.med.pos.mod.vpa))
[1] TRUE
head(vpa.med.pos.pca.var)
PC2
1 -1.8760311
2 -1.9221080
3 -1.5321371
4 -1.0724871
5 -0.6100211
6 -1.1385369
colnames(vpa.med.pos.mod.vpa) <- c("cntrl", "DRUGvsCNTRL")
vpa.med.pos.d.sv <- cbind(vpa.med.pos.mod.vpa, vpa.med.pos.pca.var)
head(vpa.med.pos.d.sv)
cntrl DRUGvsCNTRL PC2
T1_P1_C1 1 0 -1.8760311
T1_P1_C2 1 0 -1.9221080
T1_P1_C3 1 0 -1.5321371
T1_P1_V1 1 1 -1.0724871
T1_P1_V2 1 1 -0.6100211
T1_P1_V3 1 1 -1.1385369
vpa.med.pos.top.table <- vpa.med.pos.data.for.sva %>%
lmFit(vpa.med.pos.d.sv) %>%
eBayes() %>%
topTable(coef = "DRUGvsCNTRL", adjust = "bonferroni", p.value = 0.05, n = nrow(vpa.med.pos.data.for.sva))
vpa.med.pos.top.table
data frame with 0 columns and 0 rows
Residuals for plotting:
vpa.med.pos.gathered <- vpa.med.pos.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.med.pos.pca.var) %>%
select(Samples, Treatment, PC2, starts_with("VPApM")) %>%
gather(key = "Compound", value = "Abundance", VPApM1:VPApM9)
vpa.med.pos.nested <- vpa.med.pos.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ PC2, data = .))) %>%
mutate(augment_model = map(model, augment))
vpa.med.pos.modSV.resid <- vpa.med.pos.nested %>%
unnest(data, augment_model) %>%
select(Samples, Treatment, Compound, .resid) %>%
spread(Compound, .resid)
(not evaluated)
#write_csv(vpa.cell.neg.modSV.resid, path = "./results/vpa_cell_neg_modSV_resid.csv")
#write_csv(vpa.cell.pos.modSV.resid, path = "./results/modSV_resid/vpa_cell_pos_modSV_resid.csv")
#write_csv(vpa.med.neg.modSV.resid, path = "./results/modSV_resid/vpa_med_neg_modSV_resid.csv")
#write_csv(vpa.med.pos.modSV.resid, path = "./results/modSV_resid/vpa_med_pos_modSV_resid.csv")
head(vpa.cell.neg.top.w.info)
compound_short logFC AveExpr t P.Value adj.P.Val
1 VPAnC58 -0.3207540 17.48468 -4.615315 1.445584e-04 0.0258759505
2 VPAnC65 -0.3708957 20.56905 -4.826636 8.713540e-05 0.0155972362
3 VPAnC97 -0.3720153 19.32692 -7.495251 2.104994e-07 0.0000376794
4 VPAnC105 -0.3879737 15.24568 -4.509934 1.861965e-04 0.0333291767
5 VPAnC41 -0.4198513 18.18660 -6.550547 1.613484e-06 0.0002888137
6 VPAnC23 -0.4622609 18.27583 -6.245694 3.190768e-06 0.0005711475
B vpa_div_cntrl change_in_vpa compound_full
1 0.4099156 0.8006513 down N-Acetylserine
2 0.9152870 0.7733022 down Xanthine
3 6.9920292 0.7727024 down myo-Inositol
4 0.1577535 0.7642022 down N-alpha-Acetylglutamine
5 4.9304963 0.7475017 down Asparagine
6 4.2413631 0.7258479 down Thiosulfate
formula mass rt cas_id
1 C5 H9 N O4 147.054 3.38 16354-58-8
2 C5 H4 N4 O2 152.034 1.92 69-89-6
3 C6 H12 O6 180.064 5.02 87-89-8
4 C7 H12 N2 O4 188.079 5.83 2490-97-3
5 C4 H8 N2 O3 132.054 8.37 70-47-3
6 H2 O3 S2 113.945 4.85 14383-50-7
head(vpa.cell.pos.top.w.info)
compound_short logFC AveExpr t P.Value adj.P.Val
1 VPApC35 -0.3182566 18.67222 -4.853699 7.687773e-05 0.010916637
2 VPApC40 -0.3905657 15.41180 -4.621143 1.350775e-04 0.019181004
3 VPApC51 -0.4532062 16.91997 -4.489349 1.860706e-04 0.026422023
4 VPApC101 -0.6350004 14.04687 -5.753222 8.991813e-06 0.001276837
5 VPApC24 -0.6459926 19.38535 -4.275739 3.128901e-04 0.044430397
6 VPApC49 -0.6773521 14.61980 -4.311187 2.870291e-04 0.040758128
B vpa_div_cntrl change_in_vpa compound_full
1 0.58407812 0.8020385 down Asparagine
2 0.01844396 0.7628304 down N-Methylnicotinate
3 -0.30211613 0.7304178 down Xanthine
4 2.75153845 0.6439406 down Glycerol 1-phosphoserine
5 -0.82081936 0.6390530 down Nicotinamide
6 -0.73485577 0.6253119 down D-Ribose
formula mass rt cas_id
1 C4 H8 N2 O3 132.054 8.39 70-47-3
2 C7 H7 N O2 137.048 10.39 535-83-1
3 C5 H4 N4 O2 152.034 1.92 69-89-6
4 C6 H14 N O8 P 259.045 8.00 26289-09-8
5 C6 H6 N2 O 122.048 1.90 98-92-0
6 C5 H10 O5 150.052 2.45 50-69-1
head(vpa.med.neg.top.w.info)
compound_short logFC AveExpr t P.Value adj.P.Val
1 VPAnM24 5.15845 18.25088 29.61713 1.105853e-22 4.533997e-21
B vpa_div_cntrl change_in_vpa compound_full formula mass rt
1 41.71555 35.71479 up Valproic acid C8 H16 O2 144.115 1.3
cas_id
1 99-66-1
vpa.full.hit.list <- vpa.cell.neg.top.w.info %>%
mutate(Mode = "cell.neg") %>%
bind_rows(
vpa.cell.pos.top.w.info %>%
mutate(Mode = "cell.pos")
) %>%
bind_rows(
vpa.med.neg.top.w.info %>%
mutate(Mode = "med.neg")
) %>%
as.tibble()
Warning: `as.tibble()` is deprecated, use `as_tibble()` (but mind the new semantics).
This warning is displayed once per session.
vpa.sml.hit.list <- vpa.full.hit.list %>%
select(compound_full:formula, cas_id) %>%
distinct() %>%
arrange(compound_full) %>%
left_join(cmpnd.id.db, by = "cas_id")
all.equal(vpa.sml.hit.list$compound_full.x, vpa.sml.hit.list$compound_full.y)
[1] TRUE
#write_csv(path = "./results/vpa_only_exp_hits_w_kegg_sv_mod.csv", vpa.sml.hit.list)
glimpse(vpa.sml.hit.list)
Observations: 58
Variables: 7
$ compound_full.x <chr> "11Z,14Z-Eicosadienoic Acid (20:2 n-6)", "2-Am...
$ formula <chr> "C20 H36 O2", "C6 H11 N O4", "C3 H7 N O4 S", "...
$ cas_id <chr> "2091-39-6", "542-32-5", "1115-65-7", "73-24-5...
$ compound_full.y <chr> "11Z,14Z-Eicosadienoic Acid (20:2 n-6)", "2-Am...
$ HMDB <chr> "HMDB05060", "HMDB00510", "HMDB00996", "HMDB00...
$ Metlin <chr> "62964", "3271", "5927", "85", "34522", "63203...
$ KEGG <chr> "C16525", "C00956", "C00606", "C00147", "C0000...
listDatabases()
path <- keggList("pathway")
org <- keggList("organism")
compound <- keggList("compound")
c(listDatabases(), org[,1], org[,2])
ATP <- keggGet("cpd:C00002")
nucleotide.metab <- read_csv("./data/pathways/vpa_only_exp_nucleotide_hits.csv")
Parsed with column specification:
cols(
compound_full.x = col_character(),
formula = col_character(),
cas_id = col_character(),
HMDB = col_character(),
Metlin = col_double(),
KEGG = col_character(),
path1 = col_character(),
path2 = col_character(),
path3 = col_character()
)
### Purine Metabolism ###
purine.triphosphates <- c("ATP", "GTP", "dATP", "dGTP")
purine.for.plot <- nucleotide.metab %>%
inner_join(vpa.full.hit.list, by = "cas_id") %>%
filter(path1 == "Purine" | path2 == "Purine" | path3 == "Purine") %>%
select(compound_full.x, compound_short) %>%
rename(compound_full = compound_full.x) %>%
bind_rows(
vpa.cell.neg.compound.info %>%
filter(compound_full %in% purine.triphosphates) %>%
select(compound_full, compound_short)
) %>%
bind_rows(
vpa.cell.pos.compound.info %>%
filter(compound_full %in% purine.triphosphates) %>%
select(compound_full, compound_short)
)
#write_csv(path = "./data/pathways/purine_pathway.csv", purine.for.plot) do not run
purine.plot.order <- read_csv("./data/pathways/purine_pathway.csv") %>%
mutate(plot_order = factor(Name, levels = Name))
Parsed with column specification:
cols(
compound_full = col_character(),
compound_short = col_character(),
Name = col_character(),
Sig = col_character()
)
purine.data <- vpa.cell.neg.modSV.resid %>%
inner_join(vpa.cell.pos.modSV.resid %>% select(-Treatment), by = "Samples") %>%
select(Samples:Treatment, one_of(purine.for.plot$compound_short)) %>%
mutate_at(vars(matches("VPA")), scale)
purine.sig.plot <- purine.data %>%
gather(key = "compound_short", value = "Abundance", -Samples, -Treatment) %>%
inner_join(
purine.plot.order %>%
filter(compound_full != "Aspartic Acid" & compound_full != "Ribose 5-Phosphate"),
by = "compound_short"
) %>%
filter(Sig == "Y") %>%
ggplot(aes(plot_order, Abundance, color = Treatment)) +
geom_boxplot(position = position_dodge(0.8)) +
geom_jitter(size = 2, alpha = 0.5, position = position_dodge(0.8)) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
panel.background = element_rect(fill = "gray90")
) +
xlab("") +
ylab("") +
# ggtitle("Normalized Abundance by Treatment Group\nPyrimidine Metabolism\nStatistically Significant Compounds") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
ylim(-2.5, 2.5)
Warning: attributes are not identical across measure variables;
they will be dropped
purine.sig.plot
purine.not.sig.plot <- purine.data %>%
gather(key = "compound_short", value = "Abundance", -Samples, -Treatment) %>%
inner_join(purine.plot.order, by = "compound_short") %>%
filter(Sig == "N") %>%
ggplot(aes(plot_order, Abundance, color = Treatment)) +
geom_boxplot(position = position_dodge(0.8)) +
geom_jitter(size = 2, alpha = 0.5, position = position_dodge(0.8)) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
panel.background = element_rect(fill = "gray90")
) +
xlab("") +
ylab("") +
#ggtitle("Normalized Abundance by Treatment Group\nPyrimidine Metabolism\nNot Statistically Significant Compounds") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
ylim(-2.5, 2.5)
Warning: attributes are not identical across measure variables;
they will be dropped
purine.not.sig.plot
### Pyrimidine Metabolism ###
pyrimidine.triphosphates <- c("UTP", "CTP", "dTTP", "dCTP")
pyrimidine.for.plot <- nucleotide.metab %>%
inner_join(vpa.full.hit.list, by = "cas_id") %>%
filter(path1 == "Pyrimidine" | path2 == "Pyrimidine" | path3 == "Pyrimidine") %>%
select(compound_full.x, compound_short) %>%
rename(compound_full = compound_full.x) %>%
bind_rows(
vpa.cell.neg.compound.info %>%
filter(compound_full %in% pyrimidine.triphosphates) %>%
select(compound_full, compound_short)
) %>%
bind_rows(
vpa.cell.pos.compound.info %>%
filter(compound_full %in% pyrimidine.triphosphates) %>%
select(compound_full, compound_short)
) %>%
distinct()
#write_csv(path = "./data/pathways/pyrimidine_pathway.csv", pyrimidine.for.plot)
pyrimidine.plot.order <- read_csv("./data/pathways/pyrimidine_pathway.csv") %>%
mutate(plot_order = factor(Name, levels = Name))
Parsed with column specification:
cols(
compound_full = col_character(),
compound_short = col_character(),
Name = col_character(),
Sig = col_character()
)
pyrimidine.data <- vpa.cell.neg.modSV.resid %>%
inner_join(vpa.cell.pos.modSV.resid %>% select(-Treatment), by = "Samples") %>%
select(Samples:Treatment, one_of(pyrimidine.for.plot$compound_short)) %>%
mutate_at(vars(matches("VPA")), scale)
pyrimidine.sig.plot <- pyrimidine.data %>%
gather(key = "compound_short", value = "Abundance", -Samples, -Treatment) %>%
inner_join(
pyrimidine.plot.order %>%
filter(compound_full != "Ribose 5-Phosphate"), by = "compound_short") %>%
filter(Sig == "Y") %>%
ggplot(aes(plot_order, Abundance, color = Treatment)) +
geom_boxplot(position = position_dodge(0.8)) +
geom_jitter(size = 2, alpha = 0.5, position = position_dodge(0.8)) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
panel.background = element_rect(fill = "gray90")
) +
xlab("") +
ylab("") +
#ggtitle("Normalized Abundance by Treatment Group\nPyrimidine Metabolism\nStatistically Significant Compounds") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
ylim(-2.5, 2.5)
Warning: attributes are not identical across measure variables;
they will be dropped
pyrimidine.sig.plot
pyrimidine.not.sig.plot <- pyrimidine.data %>%
gather(key = "compound_short", value = "Abundance", -Samples, -Treatment) %>%
inner_join(pyrimidine.plot.order, by = "compound_short") %>%
filter(Sig == "N") %>%
ggplot(aes(plot_order, Abundance, color = Treatment)) +
geom_boxplot(position = position_dodge(0.8)) +
geom_jitter(size = 2, alpha = 0.5, position = position_dodge(0.8)) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
panel.background = element_rect(fill = "gray90")
) +
xlab("") +
ylab("") +
#ggtitle("Normalized Abundance by Treatment Group\nPyrimidine Metabolism\nNot Statistically Significant Compounds") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
ylim(-2.5, 2.5)
Warning: attributes are not identical across measure variables;
they will be dropped
pyrimidine.not.sig.plot
### Pentose Phosphate Pathway ###
ppp.for.plot <- nucleotide.metab %>%
inner_join(vpa.full.hit.list, by = "cas_id") %>%
filter(path1 == "PPP" | path2 == "PPP" | path3 == "PPP") %>%
select(compound_full.x, compound_short) %>%
rename(compound_full = compound_full.x)
#write_csv(path = "./data/pathways/ppp_pathway.csv", ppp.for.plot)
ppp.plot.order <- read_csv("./data/pathways/ppp_pathway.csv") %>%
mutate(plot_order = factor(Name, levels = Name))
Parsed with column specification:
cols(
compound_full = col_character(),
compound_short = col_character(),
Name = col_character(),
Sig = col_character()
)
ppp.data <- vpa.cell.neg.modSV.resid %>%
inner_join(vpa.cell.pos.modSV.resid %>% select(-Treatment), by = "Samples") %>%
select(Samples:Treatment, one_of(ppp.for.plot$compound_short)) %>%
mutate_at(vars(matches("VPA")), scale)
ppp.sig.plot <- ppp.data %>%
gather(key = "compound_short", value = "Abundance", -Samples, -Treatment) %>%
inner_join(ppp.plot.order, by = "compound_short") %>%
ggplot(aes(plot_order, Abundance, color = Treatment)) +
geom_boxplot(position = position_dodge(0.8)) +
geom_jitter(size = 2, alpha = 0.5, position = position_dodge(0.8)) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
panel.background = element_rect(fill = "gray90"),
legend.justification = "top"
) +
xlab("") +
ylab("") +
#ggtitle("Normalized Abundance by Treatment Group\nppp Metabolism\nStatistically Significant Compounds") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
ylim(-2.75, 2.5) +
scale_x_discrete(labels = c("Glucose\n6-Phosphate", "Ribose", "Deoxyribose", "Glyceraldehyde\n3-Phosphate (Neg)", "Glyceraldehyde\n3-Phosphate (Pos)", "Ribose\n5-Phosphate", "Sedoheptulose\n7-Phosphate"))
Warning: attributes are not identical across measure variables;
they will be dropped
ppp.sig.plot
### all together
nuc.legend <- get_legend(ppp.sig.plot)
vpa.only.nuc.sig.plot.grid <- plot_grid(
purine.sig.plot + theme(legend.position = "none", plot.margin = unit(c(6,6,0,0), "pt")),
pyrimidine.sig.plot + theme(legend.position = "none", plot.margin = unit(c(0,6,0,0), "pt")),
plot_grid(
ppp.sig.plot + theme(legend.position = "none", plot.margin = unit(c(0,6,0,0), "pt")),
nuc.legend,
rel_widths = c(1.45, 0.55)
),
ncol = 1, labels = c("A", "B", "C"),
rel_heights = c(1, 1, 1)
)
vpa.only.nuc.sig.plot.grid
#save_plot("./results/vpa_only_exp_nuc_sig.png", vpa.only.nuc.sig.plot.grid, base_height = 10, base_width = 8)
nuc.not.sig.row.plots <- plot_grid(
purine.not.sig.plot,
pyrimidine.not.sig.plot,
labels = c("A", "B"),
ncol = 1
)
nuc.not.sig.row.plots
#save_plot("./results/vpa_only_exp_nuc_not_sig.png", nuc.not.sig.row.plots, base_width = 8, base_height = 8)
Towards the end to avoid package issues
#"#56B4E9", "#E69F00"
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
vpa.cell.col.colors <- c(
rep("#56B4E9", 3), rep("#E69F00", 3),
rep("#56B4E9", 2), rep("#E69F00", 3),
rep("#56B4E9", 3), rep("#E69F00", 3),
rep("#56B4E9", 3), rep("#E69F00", 3)
)
vpa.cell.neg.top.w.info <- vpa.cell.neg.top.w.info %>%
mutate(
cmpnd_full_mod = ifelse(
compound_full == "11Z,14Z-Eicosadienoic Acid (20:2 n-6)",
"Eicosadienoic Acid (20:2)",
ifelse(
compound_full == "Valproic acid",
"Valproic Acid",
ifelse(
compound_full == "G3P",
"Glyceraldehyde 3-Phosphate",
ifelse(
compound_full == "D-Glucose 6-phosphate",
"Glucose 6-Phosphate",
ifelse(
compound_full == "N-alpha-Acetylglutamine",
"N-Acetylglutamine",
compound_full
)
)
)
)
)
)
vpa.cell.neg.modSV.resid %>%
select(Samples, one_of(vpa.cell.neg.top.w.info$compound_short)) %>%
HeatmapPrepAlt("VPAnC") %>%
t() %>%
heatmap.2(
col = viridis(n = 10, option = "A"),
trace = "none",
ColSideColors = vpa.cell.col.colors,
labRow = vpa.cell.neg.top.w.info$cmpnd_full_mod,
margins = c(2, 13),
dendrogram = "column",
symbreaks = FALSE,
density.info = "none",
key.xlab = "Scaled Value",
labCol = NA
)
vpa.cell.neg.resid.for.profile <- vpa.cell.neg.modSV.resid %>%
select(Samples:Treatment, one_of(vpa.cell.neg.top.w.info$compound_short))
vpa.cell.neg.resid.for.profile %>%
gather("Compound", value = "Abundance", -c(Samples, Treatment)) %>%
group_by(Samples) %>%
count() %>%
filter(n != 41)
## # A tibble: 0 x 2
## # Groups: Samples [0]
## # ... with 2 variables: Samples <chr>, n <int>
vpa.cell.neg.resid.order <- vpa.cell.neg.resid.for.profile %>%
gather("Compound", value = "Abundance", -c(Samples, Treatment)) %>%
group_by(Compound, Treatment) %>%
summarize(avg.abun = mean(Abundance)) %>%
ungroup() %>%
spread(key = "Treatment", value = "avg.abun") %>%
mutate(FC = vpa - cntrl) %>%
arrange(FC) %>%
mutate(compound_sort = factor(Compound)) %>%
inner_join(vpa.cell.neg.top.w.info, by = c("Compound" = "compound_short"))
vpa.cell.neg.modSV.resid.prof.plot <- vpa.cell.neg.resid.for.profile %>%
gather("Compound", value = "Abundance", -c(Samples, Treatment)) %>%
mutate(compound_order = factor(Compound, levels = vpa.cell.neg.resid.order$compound_sort)) %>%
ggplot(aes(compound_order, Abundance, color = Treatment, group = Samples)) +
geom_line(alpha = 0.3, size = 1.25) +
theme_minimal() +
theme(
axis.ticks.x = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)
) +
scale_x_discrete(name = "Compound", labels = vpa.cell.neg.resid.order$cmpnd_full_mod) +
scale_color_manual(values = c("#56B4E9","#E69F00"), labels = c("Control", "VPA")) +
ylab("log2(Compound Resid)") +
# overlay the group averages
geom_line(data = vpa.cell.neg.resid.order %>% gather("Treatment", "treat_mean", cntrl:vpa), aes(compound_sort, treat_mean, color = Treatment, group = Treatment),
size = 2
)
vpa.cell.neg.resid.clust <- vpa.cell.neg.resid.for.profile %>%
mutate_at(vars(starts_with("VPAnC")), scale, center = TRUE, scale = TRUE) %>%
select(-Treatment) %>%
as.data.frame() %>%
column_to_rownames("Samples") %>%
as.matrix() %>%
dist() %>%
hclust()
vpa.cell.neg.modSV.resid$Samples[vpa.cell.neg.resid.clust$order]
## [1] "T1_P1_V2" "T1_P1_V3" "T1_P1_V1" "T1_P2_V3" "T1_P2_V2" "T2_P2_V3"
## [7] "T2_P1_V2" "T2_P2_V2" "T2_P1_V1" "T2_P2_V1" "T1_P2_V1" "T2_P1_V3"
## [13] "T1_P1_C1" "T1_P2_C1" "T1_P1_C3" "T1_P1_C2" "T1_P2_C2" "T2_P2_C1"
## [19] "T2_P1_C3" "T2_P1_C2" "T2_P2_C2" "T2_P1_C1" "T2_P2_C3"
vpa.cell.neg.modSV.resid.heatmap <- vpa.cell.neg.resid.for.profile %>%
mutate_at(vars(starts_with("VPAnC")), scale, center = TRUE, scale = TRUE) %>%
gather("Compound", "scaled_abun", -c(Samples:Treatment)) %>%
mutate(compound_order = factor(Compound, levels = vpa.cell.neg.resid.order$compound_sort)) %>%
mutate(sample_order = factor(Samples, levels = vpa.cell.neg.modSV.resid$Samples[vpa.cell.neg.resid.clust$order])) %>%
mutate(scaled_abun = cut(scaled_abun, breaks = seq(-3, 3, by = 0.5), right = FALSE)) %>%
ggplot(aes(compound_order, Samples, fill = scaled_abun)) +
geom_raster() +
scale_fill_brewer(palette = "RdBu") +
#scale_fill_viridis_d(option = "A") +
theme_minimal() +
scale_x_discrete(name = "Compound", labels = vpa.cell.neg.resid.order$cmpnd_full_mod) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)
) +
facet_wrap(~ Treatment, ncol = 1, scales = "free")
## Warning: attributes are not identical across measure variables;
## they will be dropped
vpa.cell.neg.heatmap.grid <- plot_grid(
vpa.cell.neg.modSV.resid.prof.plot,
vpa.cell.neg.modSV.resid.heatmap,
ncol = 1,
labels = c("A", "B"),
rel_heights = c(1, 1.5)
)
vpa.cell.neg.heatmap.grid
#save_plot("./results/vpa_cell_neg_heatmap_grid.png", vpa.cell.neg.heatmap.grid, base_height = 10, base_width = 7)
sessionInfo()
R version 3.5.2 (2018-12-20)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] gplots_3.0.1 ggcorrplot_0.1.2 bindrcpp_0.2.2
[4] ggthemes_4.0.1 KEGGREST_1.22.0 ggridges_0.5.1
[7] broom_0.5.1 limma_3.38.3 sva_3.30.0
[10] BiocParallel_1.16.2 genefilter_1.64.0 mgcv_1.8-26
[13] nlme_3.1-137 heatmaply_0.15.2 viridis_0.5.1
[16] viridisLite_0.3.0 plotly_4.8.0 GGally_1.4.0
[19] cowplot_0.9.4 forcats_0.3.0 stringr_1.3.1
[22] dplyr_0.7.8 purrr_0.2.5 readr_1.3.1
[25] tidyr_0.8.2 tibble_2.0.1 ggplot2_3.1.0
[28] tidyverse_1.2.1
loaded via a namespace (and not attached):
[1] readxl_1.2.0 backports_1.1.3 plyr_1.8.4
[4] lazyeval_0.2.1 splines_3.5.2 crosstalk_1.0.0
[7] digest_0.6.18 foreach_1.4.4 htmltools_0.3.6
[10] gdata_2.18.0 fansi_0.4.0 magrittr_1.5
[13] memoise_1.1.0 cluster_2.0.7-1 gclus_1.3.2
[16] Biostrings_2.50.1 annotate_1.60.0 modelr_0.1.2
[19] matrixStats_0.54.0 colorspace_1.4-0 blob_1.1.1
[22] rvest_0.3.2 haven_2.0.0 xfun_0.4
[25] crayon_1.3.4 RCurl_1.95-4.11 jsonlite_1.6
[28] bindr_0.1.1 survival_2.43-3 iterators_1.0.10
[31] glue_1.3.0 registry_0.5 gtable_0.2.0
[34] zlibbioc_1.28.0 XVector_0.22.0 webshot_0.5.1
[37] kernlab_0.9-27 prabclus_2.2-7 BiocGenerics_0.28.0
[40] DEoptimR_1.0-8 scales_1.0.0 mvtnorm_1.0-8
[43] DBI_1.0.0 Rcpp_1.0.0 xtable_1.8-3
[46] bit_1.1-14 mclust_5.4.2 stats4_3.5.2
[49] htmlwidgets_1.3 httr_1.4.0 RColorBrewer_1.1-2
[52] fpc_2.1-11.1 modeltools_0.2-22 pkgconfig_2.0.2
[55] reshape_0.8.8 XML_3.98-1.16 flexmix_2.3-14
[58] nnet_7.3-12 utf8_1.1.4 tidyselect_0.2.5
[61] labeling_0.3 rlang_0.3.1 reshape2_1.4.3
[64] later_0.7.5 AnnotationDbi_1.44.0 munsell_0.5.0
[67] cellranger_1.1.0 tools_3.5.2 cli_1.0.1
[70] generics_0.0.2 RSQLite_2.1.1 evaluate_0.12
[73] yaml_2.2.0 knitr_1.21 bit64_0.9-7
[76] robustbase_0.93-3 caTools_1.17.1.1 dendextend_1.9.0
[79] mime_0.6 whisker_0.3-2 xml2_1.2.0
[82] compiler_3.5.2 rstudioapi_0.9.0 png_0.1-7
[85] stringi_1.2.4 lattice_0.20-38 trimcluster_0.1-2.1
[88] Matrix_1.2-15 pillar_1.3.1 data.table_1.12.0
[91] bitops_1.0-6 seriation_1.2-3 httpuv_1.4.5.1
[94] R6_2.3.0 promises_1.0.1 TSP_1.1-6
[97] KernSmooth_2.23-15 gridExtra_2.3 IRanges_2.16.0
[100] codetools_0.2-16 MASS_7.3-51.1 gtools_3.8.1
[103] assertthat_0.2.0 withr_2.1.2 S4Vectors_0.20.1
[106] diptest_0.75-7 parallel_3.5.2 hms_0.4.2
[109] grid_3.5.2 class_7.3-15 rmarkdown_1.11
[112] Biobase_2.42.0 shiny_1.2.0 lubridate_1.7.4